From ce3861e114555566e7f432e88b47becc1a0975fa Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 19 Jan 2026 12:46:18 -0800 Subject: [PATCH 01/17] ai draft --- README.md | 2 +- examples/QUICKSTART.md | 152 +++++++++++++++ examples/handpose_ros2_publisher.py | 279 ++++++++++++++++++++++++++++ pyproject.toml | 4 + scripts/test_integration_points.py | 137 ++++++++++++++ scripts/test_publisher_structure.py | 111 +++++++++++ 6 files changed, 684 insertions(+), 1 deletion(-) create mode 100644 examples/QUICKSTART.md create mode 100644 examples/handpose_ros2_publisher.py create mode 100644 scripts/test_integration_points.py create mode 100644 scripts/test_publisher_structure.py diff --git a/README.md b/README.md index 5178395..c234d74 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# HandPose +w# HandPose Real-time hand tracking and retargeting to ORCA robot hand using MediaPipe and MuJoCo. diff --git a/examples/QUICKSTART.md b/examples/QUICKSTART.md new file mode 100644 index 0000000..6b8905b --- /dev/null +++ b/examples/QUICKSTART.md @@ -0,0 +1,152 @@ +# Quick Start Guide: HandPose ROS2 Integration + +This guide provides a quick reference for using HandPose with the tactile_dex data collection pipeline. + +## Prerequisites + +1. ROS2 installed and sourced: `source /opt/ros//setup.bash` +2. HandPose installed: `cd HandPose && pip install -e .` +3. Camera connected and accessible + +## Three-Terminal Setup + +### Terminal 1: HandPose Publisher +```bash +cd HandPose +python examples/handpose_ros2_publisher.py \ + --camera 0 \ + --model models/orca_hand_fixed.mjcf \ + --scale 1.0 \ + --fps 30 \ + --viz # Optional: enable visualization window +``` + +**What it does:** +- Captures video from camera (default 1280x720) +- Detects hand using HaMeR tracker +- Performs IK retargeting to ORCA hand joints +- Publishes raw IK joint angles to `/joint_states` at 30 Hz +- **IMPORTANT**: Does NOT apply REF_OFFSETS (these are applied in tactile_dex) + +### Terminal 2: Simulation (Isaac Lab) +```bash +cd tactile_dex +./isaaclab.sh -p orca_tomato_replay.py \ + --delta_topic object_delta_world \ + --rot_topic object_rot_delta_world \ + --joint_topic /joint_states +``` + +**What it does:** +- Subscribes to `/joint_states` from HandPose publisher +- Applies REF_OFFSETS and REF_GAIN to joint values +- Clamps joints to URDF limits (with limit offsets) +- Applies special limits for `right_thumb_abd` (-20° to +42°) +- Visualizes hand and object in simulation +- Publishes: + - `/sim_joint_states`: Actual joint angles from simulation + - `/object_in_wrist`: Object pose relative to wrist frame + - `/teleop_joint_states_raw`: Raw values from HandPose (diagnostic) + - `/teleop_joint_states_postproc`: After offset+gain+clamp (diagnostic) + +### Terminal 3: Data Recording +```bash +cd tactile_dex +python record_hand_can_yolanda.py record next grasp_task --notes "vision_demo" +``` + +**What it does:** +- Records `/sim_joint_states` and `/object_in_wrist` to rosbag2 +- Creates episode directories with metadata + +## Verification + +Check that topics are being published: +```bash +# List all topics +ros2 topic list + +# Echo joint states +ros2 topic echo /joint_states + +# Check publishing rate +ros2 topic hz /joint_states +``` + +## Troubleshooting + +### Camera Issues + +**Camera not found:** +- Check device: `ls /dev/video*` (Linux) or check System Preferences (macOS) +- Try different index: `--camera 1` +- On macOS, you may need to grant camera permissions + +**No hand detected:** +- Ensure hand is fully visible in frame +- Check lighting conditions (avoid backlighting) +- Lower confidence threshold: `--conf-threshold 0.2` +- Try enabling visualization: `--viz` to see what the tracker sees + +**Low frame rate:** +- Reduce camera resolution: `--width 640 --height 480` +- Lower FPS: `--fps 15` (minimum 10 Hz recommended) +- Disable visualization if enabled +- Check CPU/GPU usage + +### ROS2 Communication Issues + +**Joint states not received:** +- Verify ROS2 is sourced: `source /opt/ros//setup.bash` +- Check topic exists: `ros2 topic list` +- Verify publisher is running: `ros2 topic hz /joint_states` +- Check topic names match: publisher uses `/joint_states`, simulator subscribes to `--joint_topic` + +**Topics not visible:** +- Ensure both nodes are in the same ROS2 domain (check `ROS_DOMAIN_ID`) +- Verify network configuration if running on different machines + +### Hand Pose Issues + +**Hand looks "pre-bent" or "twisted" at rest:** +- **This indicates offsets are being applied twice!** +- Check that HandPose publisher does NOT apply REF_OFFSETS (it should output raw IK) +- Verify `orca_tomato_replay.py` applies offsets exactly once + +**Hand barely moves or moves incorrectly:** +- Check that joint names match between publisher and simulator +- Verify scale factor (`--scale`) matches your hand size +- Enable diagnostic topics: `ros2 topic echo /teleop_joint_states_raw` +- Check for joint clamping warnings (enable with `LOG_JOINT_STATS=1`) + +**Jittery or unstable hand motion:** +- Enable smoothing: `--smoothing 0.3` (0.0 = no smoothing) +- Reduce camera resolution to improve frame rate +- Check that camera is stable and not shaking + +**Joint limits being hit frequently:** +- Enable joint stats: `LOG_JOINT_STATS=1 ./isaaclab.sh -p orca_tomato_replay.py ...` +- Check diagnostic topic: `ros2 topic echo /teleop_joint_states_postproc` +- Verify URDF limits are loaded correctly (check console output) + +### Integration Notes + +**Joint Offsets:** +- HandPose publisher outputs **raw IK joint angles** without offsets +- `orca_tomato_replay.py` applies REF_OFFSETS internally (as per PDF requirements) +- This prevents double-application of offsets + +**Joint Limits:** +- Limits are parsed from `orca_hand_model/new_hand1.urdf` +- Limit offsets are applied before clamping (same as REF_OFFSETS) +- Special case: `right_thumb_abd` uses custom limits (-20° to +42°) + +**Timestamps:** +- All JointState messages use ROS system time (`node.get_clock().now()`) +- Ensures proper synchronization across topics + +## Stopping + +Press `Ctrl+C` in each terminal to stop gracefully. + + diff --git a/examples/handpose_ros2_publisher.py b/examples/handpose_ros2_publisher.py new file mode 100644 index 0000000..5e6dabe --- /dev/null +++ b/examples/handpose_ros2_publisher.py @@ -0,0 +1,279 @@ +"""ROS2 publisher for HandPose tracking and IK retargeting. + +This script captures video from a camera, runs HaMeR hand tracking, +performs IK retargeting to ORCA hand joints, and publishes JointState +messages on /joint_states for use with the tactile_dex simulation pipeline. + +IMPORTANT: This publisher outputs raw IK joint angles WITHOUT applying +REF_OFFSETS. The tactile_dex simulation applies offsets internally. +""" + +import argparse +import sys +import time +from pathlib import Path + +import cv2 +import mujoco +import numpy as np +import rclpy +from rclpy.node import Node +from rclpy.qos import qos_profile_sensor_data +from sensor_msgs.msg import JointState + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from handpose.ik_retargeting import ORCA_JOINT_NAMES, ORCAHandIKConfig, ORCAHandIKRetargeting +from handpose.tracker.hamer import HaMeRTracker + + +class HandPoseROS2Publisher(Node): + """ROS2 node that publishes hand joint states from camera tracking.""" + + def __init__( + self, + camera: int = 0, + fps: int = 30, + model_path: str = "models/orca_hand_fixed.mjcf", + scale: float = 1.0, + viz: bool = False, + smoothing: float = 0.0, + conf_threshold: float = 0.3, + hold_last: bool = False, + width: int = 1280, + height: int = 720, + ) -> None: + """Initialize the HandPose ROS2 publisher. + + Args: + camera: Camera device index + fps: Target publishing rate (Hz) + model_path: Path to MuJoCo MJCF model file + scale: Scale factor for hand size (robot/human ratio) + viz: Enable OpenCV visualization window + smoothing: Exponential smoothing factor (0.0 = no smoothing) + conf_threshold: Confidence threshold for hand detection + hold_last: If True, publish last valid joint values when hand is not detected + width: Camera frame width + height: Camera frame height + """ + super().__init__("handpose_hamer_publisher") + + # ROS2 publisher + self.pub = self.create_publisher(JointState, "/joint_states", qos_profile_sensor_data) + + # Camera setup + self.cap = cv2.VideoCapture(camera) + if not self.cap.isOpened(): + raise RuntimeError(f"Failed to open camera {camera}") + + self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) + self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) + actual_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + actual_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + self.get_logger().info(f"Camera opened: {actual_width}x{actual_height}") + + # Hand tracker + self.tracker = HaMeRTracker(smoothing_factor=smoothing, conf_threshold=conf_threshold) + + # Load MuJoCo model + model_file = Path(model_path) + if not model_file.is_absolute(): + # Resolve relative to HandPose root + model_file = Path(__file__).parent.parent / model_path + if not model_file.exists(): + raise FileNotFoundError(f"Model file not found: {model_file}") + + self.get_logger().info(f"Loading MuJoCo model from {model_file}") + model = mujoco.MjModel.from_xml_path(str(model_file)) + + # IK retargeting + ik_cfg = ORCAHandIKConfig(scale_factor=scale) + self.ik = ORCAHandIKRetargeting(model, config=ik_cfg) + + # State tracking + self.last_joint_vals = None + self.hold_last = hold_last + self.viz = viz + + # Statistics + self.frame_count = 0 + self.hand_detected_count = 0 + self.last_stats_time = time.time() + self.publish_count = 0 + + # Timer for publishing + self.dt = 1.0 / fps + self.timer = self.create_timer(self.dt, self.tick) + + self.get_logger().info( + f"HandPose ROS2 Publisher initialized: " + f"camera={camera}, fps={fps}, model={model_file.name}, scale={scale}" + ) + + def tick(self) -> None: + """Timer callback: capture frame, track hand, publish joint states.""" + ok, frame = self.cap.read() + if not ok: + self.get_logger().warn("Failed to read camera frame") + return + + # Track hands + timestamp = time.time() + hands = self.tracker.detect_hands(frame, timestamp=timestamp) + + # Select best hand (prefer Right, otherwise highest confidence) + hand = None + if hands: + right_hands = [h for h in hands if h.handedness == "Right"] + if right_hands: + hand = max(right_hands, key=lambda h: h.confidence) + else: + hand = max(hands, key=lambda h: h.confidence) + + # Solve IK if hand detected + joint_vals = None + if hand is not None: + try: + qpos = self.ik.solve(hand) + joint_vals = qpos[self.ik.joint_indices] # Extract only ORCA joints + self.last_joint_vals = joint_vals.copy() + self.hand_detected_count += 1 + except Exception as e: + self.get_logger().warn(f"IK solve failed: {e}", throttle_duration_sec=1.0) + elif self.hold_last and self.last_joint_vals is not None: + # Use last valid values + joint_vals = self.last_joint_vals.copy() + + # Publish joint state + if joint_vals is not None: + msg = JointState() + msg.header.stamp = self.get_clock().now().to_msg() + msg.name = list(ORCA_JOINT_NAMES) + msg.position = [float(x) for x in joint_vals] + self.pub.publish(msg) + self.publish_count += 1 + + # Visualization + if self.viz and hand is not None: + vis_frame = self.tracker.visualize(frame, [hand]) + cv2.imshow("HandPose Tracking", vis_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + self.get_logger().info("Visualization window closed by user") + self.viz = False + cv2.destroyAllWindows() + + # Statistics logging (every 1 second) + self.frame_count += 1 + now = time.time() + if now - self.last_stats_time >= 1.0: + fps_actual = self.frame_count / (now - self.last_stats_time) + pub_rate = self.publish_count / (now - self.last_stats_time) + detection_rate = self.hand_detected_count / self.frame_count if self.frame_count > 0 else 0.0 + + status = "OK" if hand is not None else "NO HAND" + handedness = hand.handedness if hand is not None else "N/A" + conf = hand.confidence if hand is not None else 0.0 + + if joint_vals is not None: + joint_min = float(np.min(joint_vals)) + joint_max = float(np.max(joint_vals)) + joint_info = f"joints: [{joint_min:.3f}, {joint_max:.3f}]" + else: + joint_info = "joints: N/A" + + self.get_logger().info( + f"[stats] {status} | " + f"fps: {fps_actual:.1f} | " + f"pub_rate: {pub_rate:.1f} Hz | " + f"detection: {detection_rate:.1%} | " + f"hand: {handedness} ({conf:.2f}) | " + f"{joint_info}" + ) + + # Reset counters + self.frame_count = 0 + self.hand_detected_count = 0 + self.publish_count = 0 + self.last_stats_time = now + + def destroy_node(self) -> None: + """Cleanup on shutdown.""" + self.get_logger().info("Shutting down HandPose publisher...") + if self.cap.isOpened(): + self.cap.release() + if self.viz: + cv2.destroyAllWindows() + super().destroy_node() + + +def main() -> None: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="ROS2 publisher for HandPose tracking and IK retargeting", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--camera", type=int, default=0, help="Camera device index") + parser.add_argument("--fps", type=int, default=30, help="Target publishing rate (Hz)") + parser.add_argument( + "--model", + type=str, + default="models/orca_hand_fixed.mjcf", + help="Path to MuJoCo MJCF model file", + ) + parser.add_argument("--scale", type=float, default=1.0, help="Hand scale factor (robot/human)") + parser.add_argument("--viz", action="store_true", help="Enable OpenCV visualization window") + parser.add_argument( + "--smoothing", + type=float, + default=0.0, + help="Exponential smoothing factor (0.0 = no smoothing)", + ) + parser.add_argument( + "--conf-threshold", + type=float, + default=0.3, + help="Confidence threshold for hand detection", + ) + parser.add_argument( + "--hold-last", + action="store_true", + help="Publish last valid joint values when hand is not detected", + ) + parser.add_argument("--width", type=int, default=1280, help="Camera frame width") + parser.add_argument("--height", type=int, default=720, help="Camera frame height") + + args = parser.parse_args() + + # Initialize ROS2 + rclpy.init() + + try: + node = HandPoseROS2Publisher( + camera=args.camera, + fps=args.fps, + model_path=args.model, + scale=args.scale, + viz=args.viz, + smoothing=args.smoothing, + conf_threshold=args.conf_threshold, + hold_last=args.hold_last, + width=args.width, + height=args.height, + ) + rclpy.spin(node) + node.destroy_node() + except KeyboardInterrupt: + print("\nShutting down...") + except Exception as e: + print(f"Error: {e}") + import traceback + + traceback.print_exc() + finally: + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 2fcd6d2..2dbfbea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,10 @@ module = [ "hamer.configs", "hamer.utils", "torch", + "rclpy", + 'sensor_msgs', + 'rclpy.*', + 'sensor_msgs.*', ] ignore_missing_imports = true diff --git a/scripts/test_integration_points.py b/scripts/test_integration_points.py new file mode 100644 index 0000000..6d6ffff --- /dev/null +++ b/scripts/test_integration_points.py @@ -0,0 +1,137 @@ +"""Test integration points between HandPose and tactile_dex.""" + +import sys +from pathlib import Path + +print("="*60) +print("Testing Integration Points") +print("="*60) + +# Test 1: Verify joint names match +print("\n1. Testing joint name consistency...") +try: + # From HandPose + sys.path.insert(0, str(Path(__file__).parent)) + from handpose.ik_retargeting import ORCA_JOINT_NAMES as handpose_joints + + # Expected joints (from tactile_dex expectations) + expected_joints = [ + "right_thumb_abd", "right_thumb_mcp", "right_thumb_pip", "right_thumb_dip", + "right_index_abd", "right_index_mcp", "right_index_pip", + "right_middle_abd", "right_middle_mcp", "right_middle_pip", + "right_ring_abd", "right_ring_mcp", "right_ring_pip", + "right_pinky_abd", "right_pinky_mcp", "right_pinky_pip", + ] + + if list(handpose_joints) == expected_joints: + print("✓ Joint names match exactly") + else: + print("⚠ Joint name mismatch:") + print(f" HandPose: {list(handpose_joints)}") + print(f" Expected: {expected_joints}") + +except Exception as e: + print(f"✗ Error: {e}") + +# Test 2: Verify REF_OFFSETS keys match joint names +print("\n2. Testing REF_OFFSETS consistency...") +try: + # Read from tactile_dex script + tactile_dex_path = Path(__file__).parent.parent / "tactile_dex" / "orca_tomato_replay.py" + with open(tactile_dex_path, "r") as f: + code = f.read() + + # Extract REF_OFFSETS (simple parsing) + import re + offsets_match = re.search(r'REF_OFFSETS\s*=\s*\{([^}]+)\}', code, re.DOTALL) + if offsets_match: + offsets_str = offsets_match.group(1) + offset_joints = re.findall(r'"([^"]+)"', offsets_str) + print(f"✓ Found REF_OFFSETS for joints: {offset_joints}") + + # Check if all offset joints are in ORCA_JOINT_NAMES + missing = set(offset_joints) - set(handpose_joints) + if missing: + print(f"⚠ Joints in REF_OFFSETS but not in ORCA_JOINT_NAMES: {missing}") + else: + print("✓ All REF_OFFSETS joints are in ORCA_JOINT_NAMES") + else: + print("✗ Could not find REF_OFFSETS in script") + +except Exception as e: + print(f"✗ Error: {e}") + +# Test 3: Verify topic names +print("\n3. Testing ROS2 topic names...") +try: + publisher_path = Path(__file__).parent / "examples" / "handpose_ros2_publisher.py" + with open(publisher_path, "r") as f: + pub_code = f.read() + + # Check publisher topic + if '"/joint_states"' in pub_code or "'/joint_states'" in pub_code: + print("✓ Publisher uses /joint_states") + else: + print("✗ Publisher topic not found") + + # Check subscriber topic in tactile_dex + if 'joint_topic' in code and 'default="/joint_states"' in code: + print("✓ Subscriber defaults to /joint_states") + else: + print("⚠ Subscriber topic configuration may differ") + +except Exception as e: + print(f"✗ Error: {e}") + +# Test 4: Verify no double offset application +print("\n4. Testing offset application logic...") +try: + # Check publisher doesn't apply offsets + if "REF_OFFSETS" not in pub_code: + print("✓ Publisher does NOT apply REF_OFFSETS (correct)") + else: + print("⚠ Publisher may apply REF_OFFSETS (should be in tactile_dex only)") + + # Check tactile_dex applies offsets + if "REF_OFFSETS.get" in code and "val = val + REF_OFFSETS.get" in code: + print("✓ tactile_dex applies REF_OFFSETS (correct)") + else: + print("✗ tactile_dex may not apply REF_OFFSETS") + +except Exception as e: + print(f"✗ Error: {e}") + +# Test 5: Verify timestamp usage +print("\n5. Testing timestamp usage...") +try: + # Check publisher uses ROS time + if "get_clock().now().to_msg()" in pub_code: + print("✓ Publisher uses ROS system time") + else: + print("⚠ Publisher may not use ROS time") + + # Check tactile_dex uses ROS time + if "get_clock().now().to_msg()" in code: + print("✓ tactile_dex uses ROS system time") + else: + print("⚠ tactile_dex may not use ROS time") + +except Exception as e: + print(f"✗ Error: {e}") + +# Test 6: Verify model path handling +print("\n6. Testing model path handling...") +try: + # Check publisher handles relative paths + if "Path(__file__).parent.parent" in pub_code or "Path(model_path)" in pub_code: + print("✓ Publisher handles relative model paths") + else: + print("⚠ Publisher model path handling unclear") + +except Exception as e: + print(f"✗ Error: {e}") + +print("\n" + "="*60) +print("Integration point tests complete!") +print("="*60) + diff --git a/scripts/test_publisher_structure.py b/scripts/test_publisher_structure.py new file mode 100644 index 0000000..7730015 --- /dev/null +++ b/scripts/test_publisher_structure.py @@ -0,0 +1,111 @@ +"""Test script to verify handpose_ros2_publisher.py structure without ROS2.""" + +import sys +from pathlib import Path + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent)) + +# Test imports that don't require ROS2 +print("Testing imports...") +try: + import cv2 + print("✓ cv2") +except ImportError as e: + print(f"✗ cv2: {e}") + +try: + import numpy as np + print("✓ numpy") +except ImportError as e: + print(f"✗ numpy: {e}") + +try: + import mujoco + print("✓ mujoco") +except ImportError as e: + print(f"✗ mujoco: {e}") + +try: + from handpose.ik_retargeting import ORCAHandIKRetargeting, ORCAHandIKConfig, ORCA_JOINT_NAMES + print(f"✓ handpose.ik_retargeting ({len(ORCA_JOINT_NAMES)} joints)") +except ImportError as e: + print(f"✗ handpose.ik_retargeting: {e}") + +try: + from handpose.tracker.hamer import HaMeRTracker + print("✓ handpose.tracker.hamer") +except ImportError as e: + print(f"✗ handpose.tracker.hamer: {e}") + +# Test ROS2 imports (will fail, but that's expected) +print("\nTesting ROS2 imports (expected to fail on macOS)...") +try: + import rclpy + from rclpy.node import Node + from rclpy.qos import qos_profile_sensor_data + from sensor_msgs.msg import JointState + print("✓ ROS2 packages available!") +except ImportError as e: + print(f"✗ ROS2 not available: {e}") + print(" (This is expected if ROS2 is not installed)") + +# Test that the script can be parsed +print("\nTesting script structure...") +try: + with open("examples/handpose_ros2_publisher.py", "r") as f: + code = f.read() + + # Check for key components + checks = [ + ("HandPoseROS2Publisher class", "class HandPoseROS2Publisher"), + ("HaMeRTracker usage", "HaMeRTracker"), + ("ORCAHandIKRetargeting usage", "ORCAHandIKRetargeting"), + ("JointState publishing", "JointState"), + ("ORCA_JOINT_NAMES usage", "ORCA_JOINT_NAMES"), + ("Camera capture", "cv2.VideoCapture"), + ("ROS2 timer", "create_timer"), + ("Command line args", "argparse"), + ("Main function", "def main()"), + ] + + for name, pattern in checks: + if pattern in code: + print(f"✓ {name}") + else: + print(f"✗ {name} (missing: {pattern})") + +except Exception as e: + print(f"✗ Error reading script: {e}") + +# Test ORCA_JOINT_NAMES structure +print("\nTesting ORCA joint names...") +try: + from handpose.ik_retargeting import ORCA_JOINT_NAMES + print(f"Total joints: {len(ORCA_JOINT_NAMES)}") + print("Joint names:") + for i, name in enumerate(ORCA_JOINT_NAMES, 1): + print(f" {i:2d}. {name}") + + # Verify expected joints + expected_joints = [ + "right_thumb_abd", "right_thumb_mcp", "right_thumb_pip", "right_thumb_dip", + "right_index_abd", "right_index_mcp", "right_index_pip", + "right_middle_abd", "right_middle_mcp", "right_middle_pip", + "right_ring_abd", "right_ring_mcp", "right_ring_pip", + "right_pinky_abd", "right_pinky_mcp", "right_pinky_pip", + ] + + missing = set(expected_joints) - set(ORCA_JOINT_NAMES) + if missing: + print(f"\n⚠ Missing expected joints: {missing}") + else: + print("\n✓ All expected joints present") + +except Exception as e: + print(f"✗ Error: {e}") + +print("\n" + "="*60) +print("Structure test complete!") +print("="*60) + From 9e375ae9b1b1410680a82224f15d741a552dc85e Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 19 Jan 2026 13:03:12 -0800 Subject: [PATCH 02/17] undo typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c234d74..5178395 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -w# HandPose +# HandPose Real-time hand tracking and retargeting to ORCA robot hand using MediaPipe and MuJoCo. From 5d9def539cfcb9d47959b3475b80374d95ec9d76 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 20 Jan 2026 12:31:42 -0500 Subject: [PATCH 03/17] imports --- examples/handpose_ros2_publisher.py | 3 +-- examples/live_demo_ik_dual_camera.py | 4 ++-- handpose/tracker/hamer.py | 13 +++++-------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/examples/handpose_ros2_publisher.py b/examples/handpose_ros2_publisher.py index 5e6dabe..81772f0 100644 --- a/examples/handpose_ros2_publisher.py +++ b/examples/handpose_ros2_publisher.py @@ -11,6 +11,7 @@ import argparse import sys import time +import traceback from pathlib import Path import cv2 @@ -268,8 +269,6 @@ def main() -> None: print("\nShutting down...") except Exception as e: print(f"Error: {e}") - import traceback - traceback.print_exc() finally: rclpy.shutdown() diff --git a/examples/live_demo_ik_dual_camera.py b/examples/live_demo_ik_dual_camera.py index 4372157..1722c4d 100644 --- a/examples/live_demo_ik_dual_camera.py +++ b/examples/live_demo_ik_dual_camera.py @@ -22,6 +22,7 @@ from handpose import ORCAHandIKRetargeting from handpose.ik_retargeting import FINGER_TARGET_BODIES, MP_LANDMARK_INDICES, ORCAHandIKConfig from handpose.tracker import BaseHandTracker, HandStructure +from handpose.tracker.base import FingerJoints from handpose.tracker.hamer import HaMeRTracker from handpose.tracker.mediapipe import MediaPipeTracker @@ -149,8 +150,7 @@ def merge_hand_poses( ref_handedness = struct2.handedness # Reconstruct HandStructure from averaged landmarks - # This is a simplified merge - in practice you might want more sophisticated merging - from handpose.tracker.base import FingerJoints + # TODO: implement better merging strategy # Extract finger joints from averaged landmarks thumb = FingerJoints( diff --git a/handpose/tracker/hamer.py b/handpose/tracker/hamer.py index 96216c8..76b4c78 100644 --- a/handpose/tracker/hamer.py +++ b/handpose/tracker/hamer.py @@ -4,9 +4,14 @@ from typing import Any import cv2 +import hamer.models.hamer as hamer_hamer import numpy as np import torch +from hamer.configs import CACHE_DIR_HAMER +from hamer.models import download_models, load_hamer +# IMPORTANT: pyrender OffscreenRenderer wants this key to be ABSENT +# os.environ.pop("PYOPENGL_PLATFORM", None) from .base import EPS, BaseHandTracker, FingerJoints, Handedness, HandStructure @@ -35,14 +40,6 @@ def __init__( self.device = torch.device(device) self.conf_threshold = conf_threshold - # IMPORTANT: pyrender OffscreenRenderer wants this key to be ABSENT - # os.environ.pop("PYOPENGL_PLATFORM", None) - - # Import *after* popping env var - import hamer.models.hamer as hamer_hamer - from hamer.configs import CACHE_DIR_HAMER - from hamer.models import download_models, load_hamer - class _NoopMeshRenderer: def __init__(self, *args: object, **kwargs: object) -> None: pass From 5fd99fd501c391b63a8c71043f266d8b405cea16 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 20 Jan 2026 17:58:32 -0500 Subject: [PATCH 04/17] draft offline retargeting --- examples/offline_retargeting.py | 371 ++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 2 files changed, 372 insertions(+) create mode 100644 examples/offline_retargeting.py diff --git a/examples/offline_retargeting.py b/examples/offline_retargeting.py new file mode 100644 index 0000000..f37b14e --- /dev/null +++ b/examples/offline_retargeting.py @@ -0,0 +1,371 @@ +"""Offline IK Retargeting Demo using Mink (recorded to hdf5 file).""" + +import argparse +import asyncio +import time +import xml.etree.ElementTree as ET +from pathlib import Path + +import cv2 +import h5py +import mujoco +import numpy as np +from askin import KeyboardController + +from handpose import ORCAHandIKRetargeting +from handpose.ik_retargeting import ORCA_JOINT_NAMES, ORCAHandIKConfig +from handpose.tracker import BaseHandTracker +from handpose.tracker.hamer import HaMeRTracker +from handpose.tracker.mediapipe import MediaPipeTracker + + +def load_mjcf_model(mjcf_path: Path) -> str: + """Load MJCF model and convert relative paths to absolute paths.""" + tree = ET.parse(mjcf_path) + root = tree.getroot() + + # Convert relative paths to absolute paths + # MuJoCo can't resolve relative paths when loading from string + model_dir = mjcf_path.parent + for asset in root.findall(".//asset"): + for mesh in asset.findall("mesh"): + file_attr = mesh.get("file") + if file_attr and not Path(file_attr).is_absolute(): + # Convert relative path to absolute + abs_path = (model_dir / file_attr).resolve() + mesh.set("file", str(abs_path)) + + return ET.tostring(root, encoding="unicode") + + +def get_joint_names_from_model(model: mujoco.MjModel) -> list[str]: + """Extract joint names from MuJoCo model.""" + joint_names = [] + for i in range(model.njnt): + # Get joint name by index + name_start = model.name_jointadr[i] + if name_start >= 0: + name_bytes = model.names[name_start:].split(b"\x00")[0] + joint_name = name_bytes.decode("utf-8") + joint_names.append(joint_name) + elif i < len(ORCA_JOINT_NAMES): + # Fallback: use ORCA joint names if available + joint_names.append(ORCA_JOINT_NAMES[i]) + else: + joint_names.append(f"joint_{i}") + return joint_names + + +async def main_loop( + model: mujoco.MjModel, + data: mujoco.MjData, + cap: cv2.VideoCapture, + tracker: BaseHandTracker, + ik_solver: ORCAHandIKRetargeting, + output_path: Path, + joint_names: list[str], + target_fps: float = 30.0, + show_preview: bool = True, +) -> None: + """Main loop: capture video, run IK, save joint states to HDF5. + + Args: + model: The MuJoCo model. + data: The MuJoCo data. + cap: The video capture. + tracker: The hand tracker. + ik_solver: The IK solver. + output_path: The path to the output HDF5 file. + joint_names: The names of the joints. + target_fps: The target frequency in Hz. + show_preview: Whether to show the preview window. + """ + # Initialize data storage + frame_count = 0 + start_time = time.time() + target_dt = 1.0 / target_fps if target_fps > 0 else 0.0 + + # Frequency monitoring + fps_check_interval = 30 # Check FPS every N frames + frame_times: list[float] = [] + last_fps_check_frame = 0 + + running = True + + async def key_handler(key: str) -> None: + nonlocal running + if key == "q": + running = False + print("[Main] Quitting...") + + # Initialize keyboard controller + controller = KeyboardController(key_handler=key_handler, timeout=0.01) + await controller.start() + + print(f"Recording to {output_path}") + print(f"Target frequency: {target_fps:.1f} Hz") + print("Press 'q' to quit and save.") + + try: + with h5py.File(output_path, "w") as h5_file: + # Create datasets (we'll resize as we go) + max_frames = 10000 # Initial estimate, will resize if needed + qpos_dset = h5_file.create_dataset( + "qpos", + shape=(max_frames, model.nq), + maxshape=(None, model.nq), + dtype=np.float32, + chunks=(100, model.nq), + ) + timestamp_dset = h5_file.create_dataset( + "timestamps", + shape=(max_frames,), + maxshape=(None,), + dtype=np.float64, + chunks=(100,), + ) + + # Store metadata + h5_file.attrs["joint_names"] = [name.encode("utf-8") for name in joint_names] + h5_file.attrs["n_joints"] = model.nq + h5_file.attrs["model_path"] = str(output_path) + h5_file.attrs["created_at"] = time.strftime("%Y-%m-%d %H:%M:%S") + h5_file.attrs["target_fps"] = target_fps + + last_frame_time = time.time() + + while running and cap.isOpened(): + loop_start_time = time.time() + + ret, frame = cap.read() + if not ret: + break + + timestamp = time.time() - start_time + hand_structures = tracker.detect_hands(frame, timestamp=timestamp) + + if hand_structures: + structure = hand_structures[0] + + # --- IK SOLVE --- + # 1. Update the configuration object with current robot state + mujoco.mj_forward(model, data) + ik_solver.configuration.update(data.qpos) + + # 2. Solve for new qpos + target_q = ik_solver.solve(structure) + + # 3. Save valid joint states (with NaN guard) + if not np.any(np.isnan(target_q)) and not np.any(np.isinf(target_q)): + # Resize datasets if needed + if frame_count >= max_frames: + max_frames = int(max_frames * 1.5) + qpos_dset.resize((max_frames, model.nq)) + timestamp_dset.resize((max_frames,)) + + # Store data + qpos_dset[frame_count] = target_q.astype(np.float32) + timestamp_dset[frame_count] = timestamp + frame_count += 1 + else: + print(f"Warning: IK produced NaNs on frame {frame_count}. Skipping.") + + # Optional preview window + if show_preview: + frame = tracker.visualize(frame, hand_structures) + # Calculate current FPS for display (use time since last frame) + frame_dt = time.time() - last_frame_time + current_fps = 1.0 / frame_dt if frame_dt > 0 else 0.0 + info_text = f"Frames: {frame_count} | Hands: {len(hand_structures)} | FPS: {current_fps:.1f}" + cv2.putText( + frame, info_text, (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2 + ) + cv2.imshow("Hand Tracking (Press 'q' to quit)", frame) + # Use non-blocking waitKey to avoid stalls + cv2.waitKey(1) + + # Frequency control: maintain target FPS + loop_elapsed = time.time() - loop_start_time + if target_dt > 0 and loop_elapsed < target_dt: + time.sleep(target_dt - loop_elapsed) + + # Track frame times for frequency monitoring (time between frames) + current_time = time.time() + if frame_count > 0: + frame_dt = current_time - last_frame_time + frame_times.append(frame_dt) + + last_frame_time = current_time + + # Check actual frequency periodically and warn if too low + if frame_count - last_fps_check_frame >= fps_check_interval: + if len(frame_times) >= fps_check_interval: + # Calculate average FPS over the last interval + avg_frame_time = sum(frame_times[-fps_check_interval:]) / fps_check_interval + actual_fps = 1.0 / avg_frame_time if avg_frame_time > 0 else 0.0 + + if actual_fps < target_fps * 0.9: # Warn if actual is < 90% of target + print( + f"\n[WARNING] Actual frequency ({actual_fps:.1f} Hz) is lower than " + f"target ({target_fps:.1f} Hz). Consider reducing target frequency." + ) + + last_fps_check_frame = frame_count + + # Keep only recent frame times to avoid memory growth + if len(frame_times) > fps_check_interval * 2: + frame_times = frame_times[-fps_check_interval:] + + # Small async sleep to allow keyboard events to be processed + await asyncio.sleep(0.001) + + # Resize datasets to actual size + if frame_count > 0: + qpos_dset.resize((frame_count, model.nq)) + timestamp_dset.resize((frame_count,)) + + # Calculate final statistics + total_time = time.time() - start_time + actual_fps = frame_count / total_time if total_time > 0 else 0.0 + + # Store actual FPS in metadata + h5_file.attrs["actual_fps"] = actual_fps + h5_file.attrs["total_frames"] = frame_count + h5_file.attrs["total_time"] = total_time + + print(f"\nSaved {frame_count} frames to {output_path}") + print(f"Actual frequency: {actual_fps:.2f} Hz (target: {target_fps:.1f} Hz)") + + if actual_fps < target_fps * 0.9: + print( + f"[WARNING] Final frequency ({actual_fps:.2f} Hz) was significantly lower " + f"than target ({target_fps:.1f} Hz). Consider reducing --fps for future recordings." + ) + else: + print("Warning: No valid frames recorded!") + + except KeyboardInterrupt: + print("\nInterrupted by user") + finally: + if show_preview: + cv2.destroyAllWindows() + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--camera", type=int, default=0) + parser.add_argument("--model", type=str, default="orca_hand.mjcf") + parser.add_argument("--scale", type=float, default=1.3, help="Robot/Human hand scale factor") + parser.add_argument( + "--targets", + type=str, + default="tip, ip", + help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip.", + ) + parser.add_argument( + "--tracker", + type=str, + choices=["hamer", "mp"], + default="hamer", + help="Hand tracker to use: 'hamer' for HaMeR or 'mp' for MediaPipe (default: hamer)", + ) + parser.add_argument( + "--output", + type=str, + default="retargeted_joints.hdf5", + help="Output HDF5 file path for saving joint states (default: retargeted_joints.hdf5)", + ) + parser.add_argument( + "--no-preview", + action="store_true", + help="Disable OpenCV preview window", + ) + parser.add_argument( + "--fps", + type=float, + default=30.0, + help=( + "Target frequency in Hz for recording (default: 30.0). " + "Warning will be shown if actual frequency is lower." + ), + ) + args = parser.parse_args() + + raw_targets = [part.strip().lower() for part in args.targets.split(",")] + target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt)) + if not target_joints: + target_joints = ("tip",) + supported_targets = {"tip", "ip", "pip", "mcp"} + invalid = [jt for jt in target_joints if jt not in supported_targets] + if invalid: + parser.error(f"Unsupported target joint types: {', '.join(invalid)}") + + # 1. Setup paths and model + script_dir = Path(__file__).parent + model_path = script_dir.parent / "models" / args.model if not Path(args.model).is_absolute() else Path(args.model) + + if not model_path.exists(): + print(f"Error: Model not found at {model_path}") + return + + # 2. Load MuJoCo Model + print("Loading MuJoCo model...") + xml_string = load_mjcf_model(model_path) + model = mujoco.MjModel.from_xml_string(xml_string) + data = mujoco.MjData(model) + + # Get joint names from model + joint_names = get_joint_names_from_model(model) + print(f"Found {len(joint_names)} joints in model") + + print(f"Initializing Hand Tracker ({args.tracker})...") + tracker: BaseHandTracker + if args.tracker == "hamer": + tracker = HaMeRTracker(smoothing_factor=0.0, conf_threshold=0.3) + else: + tracker = MediaPipeTracker(min_detection_confidence=0.6, min_tracking_confidence=0.6) + + print("Initializing IK Solver (Mink)...") + + ik_config = ORCAHandIKConfig( + scale_factor=args.scale, + wrist_offset_palm=np.array([0.000, 0.0, -0.05]), + target_joint_types=target_joints, + ) + + ik_solver = ORCAHandIKRetargeting(model, config=ik_config) + + # 5. Setup camera + cap = cv2.VideoCapture(args.camera) + cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) + + # 6. Setup output path + output_path = Path(args.output) + if not output_path.is_absolute(): + output_path = Path.cwd() / output_path + + # 7. Run main loop + asyncio.run( + main_loop( + model, + data, + cap, + tracker, + ik_solver, + output_path, + joint_names, + target_fps=args.fps, + show_preview=not args.no_preview, + ) + ) + + cap.release() + + +""" +Run with: + python examples/offline_retargeting.py --model orca_hand_fixed.mjcf --scale 1.0 --output my_joints.hdf5 --fps 30.0 +""" +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 2dbfbea..a4c5c0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,6 +85,7 @@ module = [ 'sensor_msgs', 'rclpy.*', 'sensor_msgs.*', + 'h5py', ] ignore_missing_imports = true From 8dd8c023f4548c21fc72bebd6468dc39c1ed4d56 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Wed, 21 Jan 2026 13:30:44 -0500 Subject: [PATCH 05/17] switch to logger --- examples/offline_retargeting.py | 50 ++++++++++++++++++++------------- handpose/requirements.txt | 1 + 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/examples/offline_retargeting.py b/examples/offline_retargeting.py index f37b14e..3be63c9 100644 --- a/examples/offline_retargeting.py +++ b/examples/offline_retargeting.py @@ -2,10 +2,12 @@ import argparse import asyncio +import logging import time import xml.etree.ElementTree as ET from pathlib import Path +import colorlogging import cv2 import h5py import mujoco @@ -18,6 +20,8 @@ from handpose.tracker.hamer import HaMeRTracker from handpose.tracker.mediapipe import MediaPipeTracker +logger = logging.getLogger(__name__) + def load_mjcf_model(mjcf_path: Path) -> str: """Load MJCF model and convert relative paths to absolute paths.""" @@ -96,15 +100,15 @@ async def key_handler(key: str) -> None: nonlocal running if key == "q": running = False - print("[Main] Quitting...") + logger.info("Quitting...") # Initialize keyboard controller controller = KeyboardController(key_handler=key_handler, timeout=0.01) await controller.start() - print(f"Recording to {output_path}") - print(f"Target frequency: {target_fps:.1f} Hz") - print("Press 'q' to quit and save.") + logger.info("Recording to %s", output_path) + logger.info("Target frequency: %f Hz", target_fps) + logger.info("Press 'q' to quit and save.") try: with h5py.File(output_path, "w") as h5_file: @@ -168,7 +172,7 @@ async def key_handler(key: str) -> None: timestamp_dset[frame_count] = timestamp frame_count += 1 else: - print(f"Warning: IK produced NaNs on frame {frame_count}. Skipping.") + logger.warning("IK produced NaNs on frame %d. Skipping.", frame_count) # Optional preview window if show_preview: @@ -204,10 +208,11 @@ async def key_handler(key: str) -> None: avg_frame_time = sum(frame_times[-fps_check_interval:]) / fps_check_interval actual_fps = 1.0 / avg_frame_time if avg_frame_time > 0 else 0.0 - if actual_fps < target_fps * 0.9: # Warn if actual is < 90% of target - print( - f"\n[WARNING] Actual frequency ({actual_fps:.1f} Hz) is lower than " - f"target ({target_fps:.1f} Hz). Consider reducing target frequency." + if actual_fps < target_fps: + logger.warning( + "Actual frequency (%f Hz) is lower than target (%f Hz).", + actual_fps, + target_fps, ) last_fps_check_frame = frame_count @@ -233,19 +238,21 @@ async def key_handler(key: str) -> None: h5_file.attrs["total_frames"] = frame_count h5_file.attrs["total_time"] = total_time - print(f"\nSaved {frame_count} frames to {output_path}") - print(f"Actual frequency: {actual_fps:.2f} Hz (target: {target_fps:.1f} Hz)") + logger.info("Saved %d frames to %s", frame_count, output_path) + logger.info("Actual frequency: %f Hz (target: %f Hz)", actual_fps, target_fps) - if actual_fps < target_fps * 0.9: - print( - f"[WARNING] Final frequency ({actual_fps:.2f} Hz) was significantly lower " - f"than target ({target_fps:.1f} Hz). Consider reducing --fps for future recordings." + if actual_fps < target_fps: + logger.warning( + "Final frequency (%f Hz) was lower than target (%f Hz). \ + Consider reducing --fps for future recordings.", + actual_fps, + target_fps, ) else: - print("Warning: No valid frames recorded!") + logger.warning("No valid frames recorded!") except KeyboardInterrupt: - print("\nInterrupted by user") + logger.info("Interrupted by user") finally: if show_preview: cv2.destroyAllWindows() @@ -256,6 +263,7 @@ def main() -> None: parser.add_argument("--camera", type=int, default=0) parser.add_argument("--model", type=str, default="orca_hand.mjcf") parser.add_argument("--scale", type=float, default=1.3, help="Robot/Human hand scale factor") + parser.add_argument("--debug", action="store_true", help="Enable debug mode") parser.add_argument( "--targets", type=str, @@ -285,12 +293,14 @@ def main() -> None: type=float, default=30.0, help=( - "Target frequency in Hz for recording (default: 30.0). " - "Warning will be shown if actual frequency is lower." + "Target frequency in Hz for recording (default: 30.0). Warning will be shown if actual frequency is lower." ), ) args = parser.parse_args() + level = logging.DEBUG if args.debug else logging.INFO + colorlogging.configure(level=level) + raw_targets = [part.strip().lower() for part in args.targets.split(",")] target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt)) if not target_joints: @@ -305,7 +315,7 @@ def main() -> None: model_path = script_dir.parent / "models" / args.model if not Path(args.model).is_absolute() else Path(args.model) if not model_path.exists(): - print(f"Error: Model not found at {model_path}") + logger.error("Model not found at %s", model_path) return # 2. Load MuJoCo Model diff --git a/handpose/requirements.txt b/handpose/requirements.txt index 82f1b3a..eb61ccb 100644 --- a/handpose/requirements.txt +++ b/handpose/requirements.txt @@ -5,3 +5,4 @@ scipy>=1.7.0 mujoco>=3.0.0 askin>=0.1.0 mink +colorlogging From ef4560d8086818e926f4fe7596c8994a831ce109 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sat, 24 Jan 2026 16:10:46 -0500 Subject: [PATCH 06/17] install make target --- Makefile | 4 ++++ README.md | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/Makefile b/Makefile index d23a294..25c2722 100644 --- a/Makefile +++ b/Makefile @@ -65,3 +65,7 @@ install-detectron2: install-hamer: install-build-deps install-xtcocotools install-detectron2 @echo "Installing HaMeR..." cd third_party/hamer && $(PIP) install . --no-build-isolation + +# Install all dependencies +install-deps: install-build-deps install-xtcocotools install-detectron2 install-hamer + diff --git a/README.md b/README.md index 5178395..e7f5c7b 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,11 @@ Or manually: pip install -e ".[dev]" ``` +To install the HaMeR dependencies, run: +```bash +make install-deps +``` + ## Running the Demos ### IK Solution From 3e56e214c12823acf162c0bb1e7b762d6c8c2f7c Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sat, 24 Jan 2026 16:11:39 -0500 Subject: [PATCH 07/17] deps --- handpose/requirements.txt | 4 ++-- pyproject.toml | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/handpose/requirements.txt b/handpose/requirements.txt index eb61ccb..005e5d1 100644 --- a/handpose/requirements.txt +++ b/handpose/requirements.txt @@ -1,5 +1,5 @@ -numpy>=1.21.0 -opencv-python>=4.5.0 +numpy>=1.21.0,<2.0 +opencv-python>=4.5.0,<4.12 mediapipe>=0.10.0 scipy>=1.7.0 mujoco>=3.0.0 diff --git a/pyproject.toml b/pyproject.toml index a4c5c0a..d2581e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,8 +9,8 @@ readme = "README.md" license = {text = "MIT"} requires-python = ">=3.11" dependencies = [ - "numpy>=1.21.0", - "opencv-python>=4.5.0", + "numpy>=1.21.0,<2.0", + "opencv-python>=4.5.0,<4.12", "mediapipe>=0.10.0", "scipy>=1.7.0", "mujoco>=3.0.0", @@ -27,6 +27,13 @@ dev = [ "ruff", "types-pyyaml", ] +ros = [ + # Note: ROS2 packages (rclpy, sensor-msgs) cannot be installed via pip. + # They must be installed via conda from robostack channel: + # conda install -c robostack-staging -c conda-forge ros-humble-rclpy ros-humble-sensor-msgs + # Or use system ROS2 installation with proper Python version compatibility. + # See handpose/requirements-ros.txt for installation instructions. +] [build-system] requires = ["setuptools>=45", "wheel"] From 794ffe595856a2cdce9b59f2aee147111daa177e Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sat, 24 Jan 2026 16:11:53 -0500 Subject: [PATCH 08/17] add smoothing --- examples/live_demo_ik.py | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/examples/live_demo_ik.py b/examples/live_demo_ik.py index d5715fa..41a244e 100644 --- a/examples/live_demo_ik.py +++ b/examples/live_demo_ik.py @@ -252,6 +252,7 @@ async def main_async( display_flag: Flag | None, label_lookup: dict[int, list[str]], camera_matrix: np.ndarray | None = None, + joint_smoothing: float = 1.0, ) -> None: """Async main loop with keyboard handling.""" running = True @@ -274,6 +275,9 @@ async def key_handler(key: str) -> None: fps_start_time = time.time() fps: float = 0.0 + # Initialize joint position smoothing state + smoothed_qpos = data.qpos.copy() + with mujoco.viewer.launch_passive(model, data) as viewer: while running and viewer.is_running() and cap.isOpened(): ret, frame = cap.read() @@ -298,7 +302,13 @@ async def key_handler(key: str) -> None: # 2. Solve for new qpos target_q = ik_solver.solve(structure) - # 3. Apply to simulation (with NaN guard) + # 3. Apply joint position smoothing (always applied; joint_smoothing=1.0 means no smoothing) + if not np.any(np.isnan(target_q)) and not np.any(np.isinf(target_q)): + # Exponential moving average: smoothed = joint_smoothing * new + (1 - joint_smoothing) * old + smoothed_qpos = joint_smoothing * target_q + (1.0 - joint_smoothing) * smoothed_qpos + target_q = smoothed_qpos + + # 4. Apply to simulation (with NaN guard) if not np.any(np.isnan(target_q)) and not np.any(np.isinf(target_q)): data.qpos[:] = target_q data.qvel[:] = 0 @@ -370,8 +380,8 @@ async def key_handler(key: str) -> None: def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--camera", type=int, default=0) - parser.add_argument("--model", type=str, default="orca_hand.mjcf") - parser.add_argument("--scale", type=float, default=1.3, help="Robot/Human hand scale factor") + parser.add_argument("--model", type=str, default="orca_hand_fixed.mjcf") + parser.add_argument("--scale", type=float, default=1.1, help="Robot/Human hand scale factor") parser.add_argument( "--dual", action="store_true", @@ -387,7 +397,7 @@ def main() -> None: "--targets", type=str, default="tip, ip", - help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip.", + help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip", ) parser.add_argument( "--tracker", @@ -396,6 +406,18 @@ def main() -> None: default="mp", help="Hand tracker to use: 'mp' for MediaPipe or 'hamer' for HaMeR (default: mp)", ) + parser.add_argument( + "--smoothing", + type=float, + default=0.0, + help="Exponential smoothing factor for HaMeR tracker (0.0 = no smoothing)", + ) + parser.add_argument( + "--joint-smoothing", + type=float, + default=1.0, + help="Smoothing factor for joint positions (0.0-1.0). Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)", + ) args = parser.parse_args() raw_targets = [part.strip().lower() for part in args.targets.split(",")] @@ -426,7 +448,7 @@ def main() -> None: print(f"Initializing Hand Tracker ({args.tracker})...") tracker: BaseHandTracker if args.tracker == "hamer": - tracker = HaMeRTracker(smoothing_factor=0.0, conf_threshold=0.3) + tracker = HaMeRTracker(smoothing_factor=args.smoothing, conf_threshold=0.3) else: tracker = MediaPipeTracker(min_detection_confidence=0.6, min_tracking_confidence=0.6) @@ -478,6 +500,10 @@ def main() -> None: print("\nStarting simulation...") print("Press 'q' to quit (or close viewer).") + # Validate joint smoothing value + if not (0.0 < args.joint_smoothing <= 1.0): + parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)") + # Run async main loop asyncio.run( main_async( @@ -491,6 +517,7 @@ def main() -> None: display_flag, mp_label_lookup, camera_matrix, + joint_smoothing=args.joint_smoothing, ) ) From 3b8dd069da1a17351528553f1a423edc89ba9fb7 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sat, 24 Jan 2026 16:12:13 -0500 Subject: [PATCH 09/17] fix ik for ros publisher --- examples/handpose_ros2_publisher.py | 127 +++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 4 deletions(-) diff --git a/examples/handpose_ros2_publisher.py b/examples/handpose_ros2_publisher.py index 81772f0..13814f2 100644 --- a/examples/handpose_ros2_publisher.py +++ b/examples/handpose_ros2_publisher.py @@ -12,8 +12,16 @@ import sys import time import traceback +import xml.etree.ElementTree as ET from pathlib import Path +# Remove system ROS2 paths from sys.path to prefer conda-installed ROS2 packages +# This prevents conflicts when system ROS2 (Python 3.10) is incompatible with conda env (Python 3.11) +sys.path = [ + p for p in sys.path + if not (p.startswith("/opt/ros/") and "python3.10" in p) +] + import cv2 import mujoco import numpy as np @@ -25,10 +33,63 @@ # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) -from handpose.ik_retargeting import ORCA_JOINT_NAMES, ORCAHandIKConfig, ORCAHandIKRetargeting +from handpose.ik_retargeting import ( + FINGER_TARGET_BODIES, + ORCA_JOINT_NAMES, + ORCAHandIKConfig, + ORCAHandIKRetargeting, +) from handpose.tracker.hamer import HaMeRTracker +def inject_target_bodies(mjcf_path: Path) -> str: + """Injects mocap bodies and tip sites into the MJCF XML string for IK targeting. + + This matches the approach in live_demo_ik.py to ensure tip sites exist for all fingers. + """ + tree = ET.parse(mjcf_path) + root = tree.getroot() + worldbody = root.find("worldbody") + + if worldbody is None: + raise ValueError("Could not find worldbody in MJCF") + + # Convert relative paths to absolute paths + # MuJoCo can't resolve relative paths when loading from string + model_dir = mjcf_path.parent + for asset in root.findall(".//asset"): + for mesh in asset.findall("mesh"): + file_attr = mesh.get("file") + if file_attr and not Path(file_attr).is_absolute(): + # Convert relative path to absolute + abs_path = (model_dir / file_attr).resolve() + mesh.set("file", str(abs_path)) + + # Add tip sites for all fingers (required for tip tracking) + tip_site_specs = { + "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.018])), + "index": ("right_index_ip", np.array([0.0, 0.0, 0.020])), + "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.022])), + "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.021])), + "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.018])), + } + + for finger, (parent_body, offset) in tip_site_specs.items(): + body_elem = root.find(f".//body[@name='{parent_body}']") + if body_elem is None: + continue + site_name = f"right_{finger}_tip_site" + if body_elem.find(f"./site[@name='{site_name}']") is not None: + continue + site = ET.SubElement(body_elem, "site") + site.set("name", site_name) + site.set("pos", " ".join(f"{value:.5f}" for value in offset)) + site.set("size", "0.0025") + site.set("rgba", "1 0.6 0.2 0.8") + + return ET.tostring(root, encoding="unicode") + + class HandPoseROS2Publisher(Node): """ROS2 node that publishes hand joint states from camera tracking.""" @@ -44,6 +105,8 @@ def __init__( hold_last: bool = False, width: int = 1280, height: int = 720, + joint_smoothing: float = 1.0, + target_joint_types: tuple[str, ...] = ("tip", "ip"), ) -> None: """Initialize the HandPose ROS2 publisher. @@ -58,6 +121,8 @@ def __init__( hold_last: If True, publish last valid joint values when hand is not detected width: Camera frame width height: Camera frame height + joint_smoothing: Smoothing factor for joint positions (0.0-1.0). Default: 1.0 (no smoothing) + target_joint_types: Tuple of joint types to target for IK (tip, ip, pip, mcp). Default: ("tip", "ip") """ super().__init__("handpose_hamer_publisher") @@ -87,11 +152,24 @@ def __init__( raise FileNotFoundError(f"Model file not found: {model_file}") self.get_logger().info(f"Loading MuJoCo model from {model_file}") - model = mujoco.MjModel.from_xml_path(str(model_file)) + # Inject tip sites into MJCF (required for tip tracking, matches live_demo_ik.py) + xml_string = inject_target_bodies(model_file) + self.model = mujoco.MjModel.from_xml_string(xml_string) # IK retargeting - ik_cfg = ORCAHandIKConfig(scale_factor=scale) - self.ik = ORCAHandIKRetargeting(model, config=ik_cfg) + # Use the same wrist_offset_palm as live_demo_ik.py to match USD model coordinate frame + # The default MJCF offset [0.002, -0.00144, -0.03872] may not match the USD model + ik_cfg = ORCAHandIKConfig( + scale_factor=scale, + target_joint_types=target_joint_types, + wrist_offset_palm=np.array([0.000, 0.0, -0.05]), # Match live_demo_ik.py + ) + self.ik = ORCAHandIKRetargeting(self.model, config=ik_cfg) + + # Initialize joint position smoothing state and MuJoCo data for forward kinematics + self.data = mujoco.MjData(self.model) + self.smoothed_qpos = self.data.qpos.copy() + self.joint_smoothing = joint_smoothing # State tracking self.last_joint_vals = None @@ -137,7 +215,20 @@ def tick(self) -> None: joint_vals = None if hand is not None: try: + # Update IK solver configuration with current robot state (required for proper IK solving) + # This matches the approach in live_demo_ik.py + self.data.qpos[:] = self.smoothed_qpos + mujoco.mj_forward(self.model, self.data) + self.ik.configuration.update(self.data.qpos) + + # Solve IK qpos = self.ik.solve(hand) + + # Apply joint position smoothing (always applied; joint_smoothing=1.0 means no smoothing) + if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)): + # Exponential moving average: smoothed = joint_smoothing * new + (1 - joint_smoothing) * old + self.smoothed_qpos = self.joint_smoothing * qpos + (1.0 - self.joint_smoothing) * self.smoothed_qpos + qpos = self.smoothed_qpos joint_vals = qpos[self.ik.joint_indices] # Extract only ORCA joints self.last_joint_vals = joint_vals.copy() self.hand_detected_count += 1 @@ -244,9 +335,35 @@ def main() -> None: ) parser.add_argument("--width", type=int, default=1280, help="Camera frame width") parser.add_argument("--height", type=int, default=720, help="Camera frame height") + parser.add_argument( + "--joint-smoothing", + type=float, + default=1.0, + help="Smoothing factor for joint positions (0.0-1.0). Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)", + ) + parser.add_argument( + "--targets", + type=str, + default="tip, ip", + help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip", + ) args = parser.parse_args() + # Parse and validate target joint types + raw_targets = [part.strip().lower() for part in args.targets.split(",")] + target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt)) + if not target_joints: + target_joints = ("tip",) + supported_targets = {"tip", "ip", "pip", "mcp"} + invalid = [jt for jt in target_joints if jt not in supported_targets] + if invalid: + parser.error(f"Unsupported target joint types: {', '.join(invalid)}") + + # Validate joint smoothing value + if not (0.0 < args.joint_smoothing <= 1.0): + parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)") + # Initialize ROS2 rclpy.init() @@ -262,6 +379,8 @@ def main() -> None: hold_last=args.hold_last, width=args.width, height=args.height, + joint_smoothing=args.joint_smoothing, + target_joint_types=target_joints, ) rclpy.spin(node) node.destroy_node() From 080120f7d81c5a8442f836142f92cf1208aa1da4 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sat, 24 Jan 2026 16:15:06 -0500 Subject: [PATCH 10/17] fmt --- examples/handpose_ros2_publisher.py | 35 +++++++++++++---------------- examples/live_demo_ik.py | 3 ++- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/examples/handpose_ros2_publisher.py b/examples/handpose_ros2_publisher.py index 13814f2..cd20b30 100644 --- a/examples/handpose_ros2_publisher.py +++ b/examples/handpose_ros2_publisher.py @@ -17,34 +17,30 @@ # Remove system ROS2 paths from sys.path to prefer conda-installed ROS2 packages # This prevents conflicts when system ROS2 (Python 3.10) is incompatible with conda env (Python 3.11) -sys.path = [ - p for p in sys.path - if not (p.startswith("/opt/ros/") and "python3.10" in p) -] - -import cv2 -import mujoco -import numpy as np -import rclpy -from rclpy.node import Node -from rclpy.qos import qos_profile_sensor_data -from sensor_msgs.msg import JointState +sys.path = [p for p in sys.path if not (p.startswith("/opt/ros/") and "python3.10" in p)] + +import cv2 # noqa: E402 +import mujoco # noqa: E402 +import numpy as np # noqa: E402 +import rclpy # noqa: E402 +from rclpy.node import Node # noqa: E402 +from rclpy.qos import qos_profile_sensor_data # noqa: E402 +from sensor_msgs.msg import JointState # noqa: E402 # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) -from handpose.ik_retargeting import ( - FINGER_TARGET_BODIES, +from handpose.ik_retargeting import ( # noqa: E402 ORCA_JOINT_NAMES, ORCAHandIKConfig, ORCAHandIKRetargeting, ) -from handpose.tracker.hamer import HaMeRTracker +from handpose.tracker.hamer import HaMeRTracker # noqa: E402 def inject_target_bodies(mjcf_path: Path) -> str: """Injects mocap bodies and tip sites into the MJCF XML string for IK targeting. - + This matches the approach in live_demo_ik.py to ensure tip sites exist for all fingers. """ tree = ET.parse(mjcf_path) @@ -220,10 +216,10 @@ def tick(self) -> None: self.data.qpos[:] = self.smoothed_qpos mujoco.mj_forward(self.model, self.data) self.ik.configuration.update(self.data.qpos) - + # Solve IK qpos = self.ik.solve(hand) - + # Apply joint position smoothing (always applied; joint_smoothing=1.0 means no smoothing) if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)): # Exponential moving average: smoothed = joint_smoothing * new + (1 - joint_smoothing) * old @@ -339,7 +335,8 @@ def main() -> None: "--joint-smoothing", type=float, default=1.0, - help="Smoothing factor for joint positions (0.0-1.0). Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)", + help="Smoothing factor for joint positions (0.0-1.0). \ + Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)", ) parser.add_argument( "--targets", diff --git a/examples/live_demo_ik.py b/examples/live_demo_ik.py index 41a244e..1804d50 100644 --- a/examples/live_demo_ik.py +++ b/examples/live_demo_ik.py @@ -416,7 +416,8 @@ def main() -> None: "--joint-smoothing", type=float, default=1.0, - help="Smoothing factor for joint positions (0.0-1.0). Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)", + help="Smoothing factor for joint positions (0.0-1.0). \ + Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)", ) args = parser.parse_args() From 45011439118936a9cff8b40b44ba551d20ca0a88 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Sat, 24 Jan 2026 16:38:12 -0500 Subject: [PATCH 11/17] deps --- Makefile | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 25c2722..53f73aa 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ py-files := $(shell find handpose examples tests -name '*.py') static-checks: @black --diff --check $(py-files) @ruff check $(py-files) + @python -m pip install "numpy>=1.21.0,<2.0" --quiet @mypy --install-types --non-interactive $(py-files) .PHONY: lint diff --git a/pyproject.toml b/pyproject.toml index d2581e4..afe77f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ module = [ 'rclpy.*', 'sensor_msgs.*', 'h5py', + "colorlogging", ] ignore_missing_imports = true From 02740f296ddcf1b60488b7ff3995d12830c125e3 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 3 Feb 2026 04:15:43 -0500 Subject: [PATCH 12/17] aligned ik sites, new ik config --- handpose/ik_retargeting.py | 61 +-- scripts/visualize_tracker_ik_sites.py | 539 ++++++++++++++++++++++++++ 2 files changed, 575 insertions(+), 25 deletions(-) create mode 100644 scripts/visualize_tracker_ik_sites.py diff --git a/handpose/ik_retargeting.py b/handpose/ik_retargeting.py index 16869a2..715cfa7 100644 --- a/handpose/ik_retargeting.py +++ b/handpose/ik_retargeting.py @@ -5,6 +5,7 @@ import mink import mujoco import numpy as np +from mink.limits import CollisionAvoidanceLimit, ConfigurationLimit from handpose.tracker.base import HandStructure @@ -44,22 +45,22 @@ }, "index": { "mcp": ("body", "right_index_mp"), - "pip": ("body", "right_index_pp"), + "pip": ("body", "right_index_ip"), # _ip body contains the PIP joint "tip": ("site", "right_index_tip_site"), }, "middle": { "mcp": ("body", "right_middle_mp"), - "pip": ("body", "right_middle_pp"), + "pip": ("body", "right_middle_ip"), # _ip body contains the PIP joint "tip": ("site", "right_middle_tip_site"), }, "ring": { "mcp": ("body", "right_ring_mp"), - "pip": ("body", "right_ring_pp"), + "pip": ("body", "right_ring_ip"), # _ip body contains the PIP joint "tip": ("site", "right_ring_tip_site"), }, "pinky": { "mcp": ("body", "right_pinky_mp"), - "pip": ("body", "right_pinky_pp"), + "pip": ("body", "right_pinky_ip"), # _ip body contains the PIP joint "tip": ("site", "right_pinky_tip_site"), }, } @@ -106,16 +107,23 @@ class ORCAHandIKConfig: # Default from MJCF: right_wrist joint pos="0.002 -0.00144 -0.03872" wrist_offset_palm: np.ndarray | None = None - # IK solver parameters + # IK solver parameters (matching manus Mink configs) dt: float = 0.05 # Timestep for IK integration (seconds) - damping: float = 1e-2 # Levenberg-Marquardt damping - solver: str = "daqp" # QP solver + damping: float = 1e-5 # Levenberg-Marquardt damping (matching manus: 1e-5) + solver: str = "quadprog" # QP solver (matching manus: "quadprog") ik_iterations: int = 5 # Number of IK passes to perform before returning (for better convergence) - # Task costs - position_cost: float = 3.0 # Cost for position tracking + # Task costs (matching manus Mink configs) + position_cost: float = 1.0 # Cost for position tracking (matching manus: 1.0) orientation_cost: float = 0.0 # Cost for orientation tracking - posture_cost: float = 1e-4 # Cost for posture task (keeps hand near neutral) + lm_damping: float = 1.0 # Levenberg-Marquardt damping for FrameTask (matching manus: 1.0) + + # Collision avoidance (optional) + use_collision_avoidance: bool = False # Enable CollisionAvoidanceLimit + collision_geom_pairs: list[tuple[list[str], list[str]]] | None = None # Geom pairs for collision avoidance + collision_gain: float = 0.85 # Collision avoidance gain (default from Mink) + collision_min_distance: float = 0.005 # Minimum distance between geoms (meters) + collision_detection_distance: float = 0.01 # Distance at which collision avoidance activates (meters) # Coordinate frame transformation # MediaPipe to Robot coordinate mapping: [MP_X, MP_Y, MP_Z] -> [Robot_X, Robot_Y, Robot_Z] @@ -194,7 +202,7 @@ def __init__( frame_type=frame_type, position_cost=self.config.position_cost, orientation_cost=self.config.orientation_cost, - lm_damping=self.config.damping, + lm_damping=self.config.lm_damping, ) finger_tasks[joint_type] = task if finger_tasks: @@ -206,16 +214,17 @@ def __init__( "(tip tracking requires fingertip sites to be injected)." ) - # We also need a posture task to encourage the hand to stay close to a "neutral" pose - # when not reaching for extremes. This prevents weird internal configurations. - self.posture_task = mink.PostureTask( - model, - cost=self.config.posture_cost, - lm_damping=self.config.damping, - ) - - self.target_pose = np.zeros(model.nq) - self.posture_task.set_target(self.target_pose) + # Initialize collision avoidance limits if enabled + self.limits = [ConfigurationLimit(model=model)] + if self.config.use_collision_avoidance and self.config.collision_geom_pairs: + collision_limit = CollisionAvoidanceLimit( + model=model, + geom_pairs=self.config.collision_geom_pairs, + gain=self.config.collision_gain, + minimum_distance_from_collisions=self.config.collision_min_distance, + collision_detection_distance=self.config.collision_detection_distance, + ) + self.limits.append(collision_limit) def _hand_structure_to_landmarks(self, hand_structure: HandStructure) -> np.ndarray: """Convert HandStructure to 21x3 landmarks array (MediaPipe format). @@ -346,8 +355,8 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray: # Perform multiple IK iterations for better convergence for iteration in range(self.config.ik_iterations): - # Build active tasks list (posture task + finger tasks) - active_tasks: list[mink.Task] = [self.posture_task] + # Build active tasks list (finger tasks only) + active_tasks: list[mink.Task] = [] # Get palm transform and compute wrist position (consistent with compute_target_positions) # Recompute each iteration in case configuration changed @@ -388,8 +397,10 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray: task.set_target(target_se3) active_tasks.append(task) - # Solve IK - vel = mink.solve_ik(self.configuration, active_tasks, dt, solver, damping) + # Solve IK with limits (collision avoidance if enabled) + vel = mink.solve_ik( + self.configuration, active_tasks, dt, solver, damping, limits=self.limits + ) # Integrate velocity to update configuration self.configuration.integrate_inplace(vel, dt) diff --git a/scripts/visualize_tracker_ik_sites.py b/scripts/visualize_tracker_ik_sites.py new file mode 100644 index 0000000..817ef92 --- /dev/null +++ b/scripts/visualize_tracker_ik_sites.py @@ -0,0 +1,539 @@ +"""Visualize tracker sites and IK target sites on ORCA hand model. + +This script overlays: +1. Tracker sites (from MediaPipe/HaMeR hand tracking) - shown as colored spheres +2. IK target sites (on ORCA hand model) - shown as different colored spheres + +Both are color-coded and labeled for easy comparison. +""" + +import argparse +import multiprocessing as mp +import sys +import time +import xml.etree.ElementTree as ET +from pathlib import Path +from queue import Empty +from typing import Protocol + +import cv2 +import mujoco +import mujoco.viewer +import numpy as np + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from handpose.ik_retargeting import ( + FINGER_TARGET_BODIES, + MP_LANDMARK_INDICES, + ORCAHandIKConfig, + ORCAHandIKRetargeting, +) +from handpose.tracker.base import HandStructure +from handpose.tracker.hamer import HaMeRTracker +from handpose.tracker.mediapipe import MediaPipeTracker + + +def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str: + """Inject mocap bodies for tracker sites and ensure IK target sites exist. + + Args: + mjcf_path: Path to MJCF model file + target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp) + + Returns: + Modified MJCF XML string + """ + tree = ET.parse(mjcf_path) + root = tree.getroot() + worldbody = root.find("worldbody") + + if worldbody is None: + raise ValueError("Could not find worldbody in MJCF") + + # Convert relative paths to absolute paths + model_dir = mjcf_path.parent + for asset in root.findall(".//asset"): + for mesh in asset.findall("mesh"): + file_attr = mesh.get("file") + if file_attr and not Path(file_attr).is_absolute(): + abs_path = (model_dir / file_attr).resolve() + mesh.set("file", str(abs_path)) + + # All IK target sites use blue for consistency + ik_site_color = "0 0 1 0.9" # Blue for all IK targets + + # First, ensure tip sites exist (create if they don't) + # Tip site offsets: [x, y, z] where z is upward direction + # Increased z offsets to move tips higher/more visible + tip_site_specs = { + "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])), # Keep as is - good alignment + "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])), # Increased from 0.028 + "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])), # Increased from 0.030 + "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])), # Increased from 0.029 + "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])), # Increased from 0.025 + } + + if "tip" in target_joint_types: + tip_sites_created = [] + for finger, (parent_body, offset) in tip_site_specs.items(): + body_elem = root.find(f".//body[@name='{parent_body}']") + if body_elem is None: + print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site") + continue + site_name = f"right_{finger}_tip_site" + existing_site = body_elem.find(f"./site[@name='{site_name}']") + if existing_site is not None: + # Update existing site - make it larger and blue + existing_site.set("type", "sphere") # Ensure type is set + existing_site.set("size", "0.005") # Larger for better visibility + existing_site.set("rgba", ik_site_color) + tip_sites_created.append(f"{finger} (updated)") + else: + # Create new tip site + site = ET.SubElement(body_elem, "site") + site.set("name", site_name) + site.set("type", "sphere") # Explicit type for visibility + site.set("pos", " ".join(f"{value:.5f}" for value in offset)) + site.set("size", "0.005") # Larger for better visibility + site.set("rgba", ik_site_color) + tip_sites_created.append(f"{finger} (created)") + print(f"Tip sites: {', '.join(tip_sites_created)}") + + # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms + allowed_types = set(target_joint_types) + markers_added = [] + for finger_name, bodies in FINGER_TARGET_BODIES.items(): + for joint_type, (frame_type, frame_name) in bodies.items(): + if joint_type not in allowed_types: + continue + + if frame_type == "body": + # Add a visual site to the body for visibility + body_elem = root.find(f".//body[@name='{frame_name}']") + if body_elem is not None: + # Check if marker site already exists + marker_site_name = f"ik_marker_{frame_name}" + existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']") + if existing_marker is not None: + # Update existing marker + existing_marker.set("size", "0.004") # Same size as tip sites + existing_marker.set("rgba", ik_site_color) + markers_added.append(f"{finger_name}_{joint_type} (updated)") + else: + # Create new marker site + site = ET.SubElement(body_elem, "site") + site.set("name", marker_site_name) + site.set("pos", "0 0 0") # At body origin + site.set("size", "0.004") # Same size as tip sites for consistency + site.set("rgba", ik_site_color) # Blue for all IK targets + markers_added.append(f"{finger_name}_{joint_type} (created)") + else: + print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}") + elif frame_type == "site": + # This is a tip site - find and update it (should already be handled above, but double-check) + found = False + for body in root.findall(".//body"): + site_elem = body.find(f"./site[@name='{frame_name}']") + if site_elem is not None: + # Update site to be visible and larger + site_elem.set("size", "0.005") # Larger for better visibility + site_elem.set("rgba", ik_site_color) + found = True + markers_added.append(f"{finger_name}_{joint_type} (site updated)") + break + if not found: + print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}") + + print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}") + + # Add mocap bodies for tracker sites (MediaPipe landmarks) + # All tracker sites use red for consistency - these represent the actual tracked hand + tracker_color = "1 0 0 0.9" # Red for all tracker sites (actual hand) + + allowed_types = set(target_joint_types) + for finger_name, mp_mapping in MP_LANDMARK_INDICES.items(): + for joint_type, mp_idx in mp_mapping.items(): + if joint_type not in allowed_types: + continue + + # Create mocap body for tracker site + mocap_body = ET.SubElement(worldbody, "body") + mocap_body.set("name", f"tracker_{finger_name}_{joint_type}") + mocap_body.set("mocap", "true") + mocap_body.set("pos", "0 0 0") + + # Add visual sphere for tracker site (larger, red) + geom = ET.SubElement(mocap_body, "geom") + geom.set("type", "sphere") + geom.set("size", "0.005") # 5mm radius (larger than IK targets for visibility) + geom.set("rgba", tracker_color) # Red for all tracker sites + geom.set("contype", "0") + geom.set("conaffinity", "0") + + return ET.tostring(root, encoding="unicode") + + +def hand_structure_to_landmarks(structure: HandStructure) -> np.ndarray: + """Convert HandStructure to 21x3 landmarks array (MediaPipe format). + + Returns landmarks in hand frame (wrist at origin). + """ + landmarks = np.zeros((21, 3)) + + # Wrist (index 0) + landmarks[0] = structure.wrist_position + + # Thumb: CMC(1), MCP(2), IP(3), tip(4) + landmarks[1] = structure.thumb.mcp # CMC approximate + landmarks[2] = structure.thumb.mcp + landmarks[3] = structure.thumb.ip if structure.thumb.ip is not None else structure.thumb.mcp + landmarks[4] = structure.thumb.tip + + # Index: MCP(5), PIP(6), DIP(7), tip(8) + landmarks[5] = structure.index.mcp + landmarks[6] = structure.index.pip if structure.index.pip is not None else structure.index.mcp + landmarks[7] = structure.index.dip if structure.index.dip is not None else structure.index.tip + landmarks[8] = structure.index.tip + + # Middle: MCP(9), PIP(10), DIP(11), tip(12) + landmarks[9] = structure.middle.mcp + landmarks[10] = structure.middle.pip if structure.middle.pip is not None else structure.middle.mcp + landmarks[11] = structure.middle.dip if structure.middle.dip is not None else structure.middle.tip + landmarks[12] = structure.middle.tip + + # Ring: MCP(13), PIP(14), DIP(15), tip(16) + landmarks[13] = structure.ring.mcp + landmarks[14] = structure.ring.pip if structure.ring.pip is not None else structure.ring.mcp + landmarks[15] = structure.ring.dip if structure.ring.dip is not None else structure.ring.tip + landmarks[16] = structure.ring.tip + + # Pinky: MCP(17), PIP(18), DIP(19), tip(20) + landmarks[17] = structure.pinky.mcp + landmarks[18] = structure.pinky.pip if structure.pinky.pip is not None else structure.pinky.mcp + landmarks[19] = structure.pinky.dip if structure.pinky.dip is not None else structure.pinky.tip + landmarks[20] = structure.pinky.tip + + return landmarks + + +def update_tracker_sites( + model: mujoco.MjModel, + data: mujoco.MjData, + hand_structure: HandStructure, + target_joint_types: tuple[str, ...], + ik_solver: ORCAHandIKRetargeting, +) -> None: + """Update mocap body positions for tracker sites based on hand structure. + + Args: + model: MuJoCo model + data: MuJoCo data + hand_structure: Tracked hand structure + target_joint_types: Joint types to visualize + ik_solver: IK solver (for coordinate transformation) + """ + landmarks_hand_frame = hand_structure_to_landmarks(hand_structure) + + # Get palm transform to convert hand frame to world frame + t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body") + p_palm = t_palm.translation() + r_palm = t_palm.rotation().as_matrix() + + # Wrist offset in palm frame + wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm + p_wrist = p_palm + wrist_offset_world + + # Scale factor + scale = ik_solver.config.scale_factor + + # Coordinate transformation + coord_transform = ik_solver.config.coord_transform + + allowed_types = set(target_joint_types) + for finger_name, mp_mapping in MP_LANDMARK_INDICES.items(): + for joint_type, mp_idx in mp_mapping.items(): + if joint_type not in allowed_types: + continue + + # Get landmark position in hand frame + landmark_pos_hand_frame = landmarks_hand_frame[mp_idx] + + # Scale to robot size + scaled_vec = landmark_pos_hand_frame * scale + + # Transform to world frame (anchor to wrist) + target_pos_world = p_wrist + scaled_vec + + # Apply coordinate transformation + rel_vec = target_pos_world - p_wrist + transformed_vec = coord_transform @ rel_vec + final_pos = p_wrist + transformed_vec + + # Update mocap body position + mocap_body_name = f"tracker_{finger_name}_{joint_type}" + body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, mocap_body_name) + if body_id >= 0: + # Get the mocap index for this body (mocap bodies have mocapid >= 0) + mocap_idx = model.body_mocapid[body_id] + if mocap_idx >= 0 and mocap_idx < model.nmocap: + data.mocap_pos[mocap_idx] = final_pos + + +def visualize_ik_target_sites( + model: mujoco.MjModel, + data: mujoco.MjData, + target_joint_types: tuple[str, ...], +) -> dict[str, dict[str, np.ndarray]]: + """Get positions of IK target sites on ORCA hand. + + Args: + model: MuJoCo model + data: MuJoCo data + target_joint_types: Joint types to visualize + + Returns: + Dictionary mapping finger names to joint_type -> position + """ + ik_positions = {} + allowed_types = set(target_joint_types) + + for finger_name, bodies in FINGER_TARGET_BODIES.items(): + finger_positions = {} + for joint_type, (frame_type, frame_name) in bodies.items(): + if joint_type not in allowed_types: + continue + + obj_type = mujoco.mjtObj.mjOBJ_BODY if frame_type == "body" else mujoco.mjtObj.mjOBJ_SITE + frame_id = mujoco.mj_name2id(model, obj_type, frame_name) + + if frame_id >= 0: + if obj_type == mujoco.mjtObj.mjOBJ_SITE: + pos = data.site(frame_id).xpos.copy() + else: + pos = data.body(frame_id).xpos.copy() + finger_positions[joint_type] = pos + + if finger_positions: + ik_positions[finger_name] = finger_positions + + return ik_positions + + +class Flag(Protocol): + value: int + + +def display_feed_process(frame_queue: mp.Queue, running_flag: Flag, window_name: str) -> None: + """Display frames in a separate process to avoid GLFW/OpenCV conflicts.""" + while True: + if not running_flag.value and frame_queue.empty(): + break + + try: + frame = frame_queue.get(timeout=0.05) + except Empty: + if not running_flag.value: + break + continue + + cv2.imshow(window_name, frame) + key = cv2.waitKey(1) & 0xFF + if key == ord("q"): + running_flag.value = 0 + break + + cv2.destroyAllWindows() + + +def main() -> None: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Visualize tracker sites and IK target sites on ORCA hand", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--camera", type=int, default=0, help="Camera device index") + parser.add_argument( + "--model", + type=str, + default="models/orca_hand_fixed.mjcf", + help="Path to MuJoCo MJCF model file", + ) + parser.add_argument( + "--tracker", + type=str, + default="hamer", + choices=["hamer", "mediapipe"], + help="Hand tracking backend", + ) + parser.add_argument( + "--targets", + type=str, + default="tip, ip", + help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip", + ) + parser.add_argument( + "--scale", + type=float, + default=1.0, + help="Hand scale factor (robot/human)", + ) + parser.add_argument( + "--show-feed", + action="store_true", + help="Show camera feed with hand tracking overlay", + ) + args = parser.parse_args() + + # Parse target joint types + raw_targets = [part.strip().lower() for part in args.targets.split(",")] + target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt)) + if not target_joints: + target_joints = ("tip",) + supported_targets = {"tip", "ip", "pip", "mcp"} + invalid = [jt for jt in target_joints if jt not in supported_targets] + if invalid: + parser.error(f"Unsupported target joint types: {', '.join(invalid)}") + + # Load model + model_file = Path(args.model) + if not model_file.is_absolute(): + model_file = Path(__file__).parent.parent / args.model + if not model_file.exists(): + raise FileNotFoundError(f"Model file not found: {model_file}") + + print(f"Loading model from {model_file}") + xml_string = inject_visualization_bodies(model_file, target_joints) + model = mujoco.MjModel.from_xml_string(xml_string) + data = mujoco.MjData(model) + + # Verify tip sites exist in the loaded model + if "tip" in target_joints: + print("\nVerifying tip sites in loaded model:") + for finger in ["thumb", "index", "middle", "ring", "pinky"]: + site_name = f"right_{finger}_tip_site" + site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name) + if site_id >= 0: + site_pos = data.site(site_id).xpos.copy() + print(f" ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]") + else: + print(f" ✗ {site_name}: NOT FOUND in model!") + + # Initialize tracker + if args.tracker == "hamer": + tracker = HaMeRTracker() + else: + tracker = MediaPipeTracker() + + # Initialize IK solver + ik_config = ORCAHandIKConfig( + scale_factor=args.scale, + target_joint_types=target_joints, + lm_damping=0.5 + ) + ik_solver = ORCAHandIKRetargeting(model, config=ik_config) + + # Open camera + cap = cv2.VideoCapture(args.camera) + if not cap.isOpened(): + raise RuntimeError(f"Failed to open camera {args.camera}") + + print("\n" + "=" * 70) + print("VISUALIZATION GUIDE:") + print("=" * 70) + print("TRACKER SITES (larger RED spheres - actual tracked hand from camera):") + print(" Red spheres = All tracker sites (thumb, index, middle, ring, pinky)") + print(" These represent the actual human hand position") + print("\nIK TARGET SITES (smaller BLUE spheres - on ORCA hand model):") + print(" Blue spheres = All IK target sites (thumb, index, middle, ring, pinky)") + print(" These represent where the robot hand should move") + print("\nNOTE: RED spheres (tracker) should align with BLUE spheres (IK targets)") + print(" when IK is solving correctly. RED = actual hand, BLUE = robot targets.") + print("\nPress 'q' in MuJoCo viewer to quit") + print("=" * 70 + "\n") + + start_time = time.time() + + # Set up multiprocessing for display window if requested + display_process: mp.Process | None = None + frame_queue: mp.Queue | None = None + running_flag: Flag | None = None + + if args.show_feed: + print("Starting camera feed display in separate process...") + frame_queue = mp.Queue(maxsize=2) # Small buffer to avoid lag + running_flag = mp.Value("i", 1) + display_process = mp.Process( + target=display_feed_process, + args=(frame_queue, running_flag, "Hand Tracking Feed"), + ) + display_process.start() + + try: + with mujoco.viewer.launch_passive(model, data) as viewer: + while viewer.is_running() and cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + timestamp = time.time() - start_time + hand_structures = tracker.detect_hands(frame, timestamp=timestamp) + + # Send frame to display process if enabled + if args.show_feed and frame_queue is not None and running_flag is not None: + if hand_structures: + vis_frame = tracker.visualize(frame, hand_structures) + else: + vis_frame = frame.copy() + try: + frame_queue.put_nowait(vis_frame) + except Exception: + pass # Queue full, skip this frame + + if hand_structures: + hand = hand_structures[0] + + # Update IK solver configuration + mujoco.mj_forward(model, data) + ik_solver.configuration.update(data.qpos) + + # Solve IK + qpos = ik_solver.solve(hand) + if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)): + data.qpos[:] = qpos + data.qvel[:] = 0 + + # Update tracker site positions + update_tracker_sites(model, data, hand, target_joints, ik_solver) + + # Forward kinematics to update IK target site positions + mujoco.mj_forward(model, data) + + # Get IK target positions (sites are automatically rendered by MuJoCo) + ik_positions = visualize_ik_target_sites(model, data, target_joints) + + # Print current positions for debugging (first frame only) + if hand_structures: + print("\nCurrent IK Target Positions:") + for finger, positions in ik_positions.items(): + for joint_type, pos in positions.items(): + print(f" {finger}_{joint_type}: [{pos[0]:.4f}, {pos[1]:.4f}, {pos[2]:.4f}]") + + # Sync viewer + viewer.sync() + + finally: + # Clean up display process + if running_flag is not None: + running_flag.value = 0 + if display_process is not None: + display_process.join(timeout=1.0) + if display_process.is_alive(): + display_process.terminate() + cap.release() + print("\nVisualization closed.") + + +if __name__ == "__main__": + main() From 3743cb47fceb4db6bc423b2bc5ee0ee61f5d897a Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 3 Feb 2026 04:19:15 -0500 Subject: [PATCH 13/17] lint --- handpose/ik_retargeting.py | 4 +--- pyproject.toml | 1 + scripts/visualize_tracker_ik_sites.py | 19 ++++++++----------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/handpose/ik_retargeting.py b/handpose/ik_retargeting.py index 715cfa7..5fa3968 100644 --- a/handpose/ik_retargeting.py +++ b/handpose/ik_retargeting.py @@ -398,9 +398,7 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray: active_tasks.append(task) # Solve IK with limits (collision avoidance if enabled) - vel = mink.solve_ik( - self.configuration, active_tasks, dt, solver, damping, limits=self.limits - ) + vel = mink.solve_ik(self.configuration, active_tasks, dt, solver, damping, limits=self.limits) # Integrate velocity to update configuration self.configuration.integrate_inplace(vel, dt) diff --git a/pyproject.toml b/pyproject.toml index afe77f0..59a84fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ module = [ 'sensor_msgs.*', 'h5py', "colorlogging", + 'mink.*', ] ignore_missing_imports = true diff --git a/scripts/visualize_tracker_ik_sites.py b/scripts/visualize_tracker_ik_sites.py index 817ef92..d7c1acf 100644 --- a/scripts/visualize_tracker_ik_sites.py +++ b/scripts/visualize_tracker_ik_sites.py @@ -64,15 +64,13 @@ def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, # All IK target sites use blue for consistency ik_site_color = "0 0 1 0.9" # Blue for all IK targets - # First, ensure tip sites exist (create if they don't) # Tip site offsets: [x, y, z] where z is upward direction - # Increased z offsets to move tips higher/more visible tip_site_specs = { - "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])), # Keep as is - good alignment - "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])), # Increased from 0.028 - "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])), # Increased from 0.030 - "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])), # Increased from 0.029 - "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])), # Increased from 0.025 + "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])), + "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])), + "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])), + "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])), + "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])), } if "tip" in target_joint_types: @@ -87,7 +85,7 @@ def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, if existing_site is not None: # Update existing site - make it larger and blue existing_site.set("type", "sphere") # Ensure type is set - existing_site.set("size", "0.005") # Larger for better visibility + existing_site.set("size", "0.005") existing_site.set("rgba", ik_site_color) tip_sites_created.append(f"{finger} (updated)") else: @@ -96,7 +94,7 @@ def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, site.set("name", site_name) site.set("type", "sphere") # Explicit type for visibility site.set("pos", " ".join(f"{value:.5f}" for value in offset)) - site.set("size", "0.005") # Larger for better visibility + site.set("size", "0.005") site.set("rgba", ik_site_color) tip_sites_created.append(f"{finger} (created)") print(f"Tip sites: {', '.join(tip_sites_created)}") @@ -137,8 +135,7 @@ def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, for body in root.findall(".//body"): site_elem = body.find(f"./site[@name='{frame_name}']") if site_elem is not None: - # Update site to be visible and larger - site_elem.set("size", "0.005") # Larger for better visibility + site_elem.set("size", "0.005") site_elem.set("rgba", ik_site_color) found = True markers_added.append(f"{finger_name}_{joint_type} (site updated)") From 3211b713a503dc75434af9addb4e1adb8f0787b6 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 3 Feb 2026 05:13:47 -0500 Subject: [PATCH 14/17] add recorder, improve ik --- examples/handpose_ros2_publisher.py | 150 ++++- examples/live_demo_ik.py | 196 ++++-- models/orca_hand_fixed.mjcf | 9 +- scripts/record_and_process_video.py | 862 ++++++++++++++++++++++++++ scripts/visualize_tracker_ik_sites.py | 31 +- 5 files changed, 1151 insertions(+), 97 deletions(-) create mode 100644 scripts/record_and_process_video.py diff --git a/examples/handpose_ros2_publisher.py b/examples/handpose_ros2_publisher.py index cd20b30..7e7f9fe 100644 --- a/examples/handpose_ros2_publisher.py +++ b/examples/handpose_ros2_publisher.py @@ -31,6 +31,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from handpose.ik_retargeting import ( # noqa: E402 + FINGER_TARGET_BODIES, ORCA_JOINT_NAMES, ORCAHandIKConfig, ORCAHandIKRetargeting, @@ -38,10 +39,15 @@ from handpose.tracker.hamer import HaMeRTracker # noqa: E402 -def inject_target_bodies(mjcf_path: Path) -> str: - """Injects mocap bodies and tip sites into the MJCF XML string for IK targeting. +def inject_target_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str: + """Inject mocap bodies for tracker sites and ensure IK target sites exist. - This matches the approach in live_demo_ik.py to ensure tip sites exist for all fingers. + Args: + mjcf_path: Path to MJCF model file + target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp) + + Returns: + Modified MJCF XML string """ tree = ET.parse(mjcf_path) root = tree.getroot() @@ -51,37 +57,99 @@ def inject_target_bodies(mjcf_path: Path) -> str: raise ValueError("Could not find worldbody in MJCF") # Convert relative paths to absolute paths - # MuJoCo can't resolve relative paths when loading from string model_dir = mjcf_path.parent for asset in root.findall(".//asset"): for mesh in asset.findall("mesh"): file_attr = mesh.get("file") if file_attr and not Path(file_attr).is_absolute(): - # Convert relative path to absolute abs_path = (model_dir / file_attr).resolve() mesh.set("file", str(abs_path)) - # Add tip sites for all fingers (required for tip tracking) + # All IK target sites use blue for consistency + ik_site_color = "0 0 1 0.9" # Blue for all IK targets + + # Tip site offsets: [x, y, z] where z is upward direction tip_site_specs = { - "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.018])), - "index": ("right_index_ip", np.array([0.0, 0.0, 0.020])), - "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.022])), - "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.021])), - "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.018])), + "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])), + "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])), + "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])), + "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])), + "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])), } - for finger, (parent_body, offset) in tip_site_specs.items(): - body_elem = root.find(f".//body[@name='{parent_body}']") - if body_elem is None: - continue - site_name = f"right_{finger}_tip_site" - if body_elem.find(f"./site[@name='{site_name}']") is not None: - continue - site = ET.SubElement(body_elem, "site") - site.set("name", site_name) - site.set("pos", " ".join(f"{value:.5f}" for value in offset)) - site.set("size", "0.0025") - site.set("rgba", "1 0.6 0.2 0.8") + if "tip" in target_joint_types: + tip_sites_created = [] + for finger, (parent_body, offset) in tip_site_specs.items(): + body_elem = root.find(f".//body[@name='{parent_body}']") + if body_elem is None: + print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site") + continue + site_name = f"right_{finger}_tip_site" + existing_site = body_elem.find(f"./site[@name='{site_name}']") + if existing_site is not None: + # Update existing site - make it larger and blue + existing_site.set("type", "sphere") # Ensure type is set + existing_site.set("size", "0.005") + existing_site.set("rgba", ik_site_color) + tip_sites_created.append(f"{finger} (updated)") + else: + # Create new tip site + site = ET.SubElement(body_elem, "site") + site.set("name", site_name) + site.set("type", "sphere") # Explicit type for visibility + site.set("pos", " ".join(f"{value:.5f}" for value in offset)) + site.set("size", "0.005") + site.set("rgba", ik_site_color) + tip_sites_created.append(f"{finger} (created)") + if tip_sites_created: + print(f"Tip sites: {', '.join(tip_sites_created)}") + + # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms + allowed_types = set(target_joint_types) + markers_added = [] + for finger_name, bodies in FINGER_TARGET_BODIES.items(): + for joint_type, (frame_type, frame_name) in bodies.items(): + if joint_type not in allowed_types: + continue + + if frame_type == "body": + # Add a visual site to the body for visibility + body_elem = root.find(f".//body[@name='{frame_name}']") + if body_elem is not None: + # Check if marker site already exists + marker_site_name = f"ik_marker_{frame_name}" + existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']") + if existing_marker is not None: + # Update existing marker + existing_marker.set("size", "0.004") # Same size as tip sites + existing_marker.set("rgba", ik_site_color) + markers_added.append(f"{finger_name}_{joint_type} (updated)") + else: + # Create new marker site + site = ET.SubElement(body_elem, "site") + site.set("name", marker_site_name) + site.set("pos", "0 0 0") # At body origin + site.set("size", "0.004") # Same size as tip sites for consistency + site.set("rgba", ik_site_color) # Blue for all IK targets + markers_added.append(f"{finger_name}_{joint_type} (created)") + else: + print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}") + elif frame_type == "site": + # This is a tip site - find and update it (should already be handled above, but double-check) + found = False + for body in root.findall(".//body"): + site_elem = body.find(f"./site[@name='{frame_name}']") + if site_elem is not None: + site_elem.set("size", "0.005") + site_elem.set("rgba", ik_site_color) + found = True + markers_added.append(f"{finger_name}_{joint_type} (site updated)") + break + if not found: + print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}") + + if markers_added: + print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}") return ET.tostring(root, encoding="unicode") @@ -101,7 +169,7 @@ def __init__( hold_last: bool = False, width: int = 1280, height: int = 720, - joint_smoothing: float = 1.0, + joint_smoothing: float = 0.7, target_joint_types: tuple[str, ...] = ("tip", "ip"), ) -> None: """Initialize the HandPose ROS2 publisher. @@ -117,7 +185,7 @@ def __init__( hold_last: If True, publish last valid joint values when hand is not detected width: Camera frame width height: Camera frame height - joint_smoothing: Smoothing factor for joint positions (0.0-1.0). Default: 1.0 (no smoothing) + joint_smoothing: Smoothing factor for joint positions (0.0-1.0). Default: 0.7 target_joint_types: Tuple of joint types to target for IK (tip, ip, pip, mcp). Default: ("tip", "ip") """ super().__init__("handpose_hamer_publisher") @@ -148,17 +216,31 @@ def __init__( raise FileNotFoundError(f"Model file not found: {model_file}") self.get_logger().info(f"Loading MuJoCo model from {model_file}") - # Inject tip sites into MJCF (required for tip tracking, matches live_demo_ik.py) - xml_string = inject_target_bodies(model_file) + # Inject tip sites into MJCF (required for tip tracking) + xml_string = inject_target_bodies(model_file, target_joint_types) self.model = mujoco.MjModel.from_xml_string(xml_string) + # Verify tip sites exist in the loaded model + if "tip" in target_joint_types: + self.get_logger().info("Verifying tip sites in loaded model:") + data_temp = mujoco.MjData(self.model) + for finger in ["thumb", "index", "middle", "ring", "pinky"]: + site_name = f"right_{finger}_tip_site" + site_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, site_name) + if site_id >= 0: + site_pos = data_temp.site(site_id).xpos.copy() + self.get_logger().info( + f" ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]" + ) + else: + self.get_logger().warn(f" ✗ {site_name}: NOT FOUND in model!") + # IK retargeting - # Use the same wrist_offset_palm as live_demo_ik.py to match USD model coordinate frame - # The default MJCF offset [0.002, -0.00144, -0.03872] may not match the USD model ik_cfg = ORCAHandIKConfig( scale_factor=scale, target_joint_types=target_joint_types, - wrist_offset_palm=np.array([0.000, 0.0, -0.05]), # Match live_demo_ik.py + lm_damping=0.5, + ik_iterations=15, ) self.ik = ORCAHandIKRetargeting(self.model, config=ik_cfg) @@ -334,15 +416,15 @@ def main() -> None: parser.add_argument( "--joint-smoothing", type=float, - default=1.0, + default=0.7, help="Smoothing factor for joint positions (0.0-1.0). \ - Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)", + Lower values = more smoothing, higher = less smoothing. Default: 0.7", ) parser.add_argument( "--targets", type=str, - default="tip, ip", - help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip", + default="tip", + help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip", ) args = parser.parse_args() diff --git a/examples/live_demo_ik.py b/examples/live_demo_ik.py index 1804d50..be9a7a9 100644 --- a/examples/live_demo_ik.py +++ b/examples/live_demo_ik.py @@ -53,8 +53,16 @@ def dual_window_process(frame_queue: mp.Queue, running_flag: Flag, window_name: cv2.destroyAllWindows() -def inject_target_bodies(mjcf_path: Path) -> str: - """Injects mocap bodies into the MJCF XML string for visualization.""" +def inject_target_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str: + """Inject mocap bodies for tracker sites and ensure IK target sites exist. + + Args: + mjcf_path: Path to MJCF model file + target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp) + + Returns: + Modified MJCF XML string + """ tree = ET.parse(mjcf_path) root = tree.getroot() worldbody = root.find("worldbody") @@ -63,60 +71,121 @@ def inject_target_bodies(mjcf_path: Path) -> str: raise ValueError("Could not find worldbody in MJCF") # Convert relative paths to absolute paths - # MuJoCo can't resolve relative paths when loading from string model_dir = mjcf_path.parent for asset in root.findall(".//asset"): for mesh in asset.findall("mesh"): file_attr = mesh.get("file") if file_attr and not Path(file_attr).is_absolute(): - # Convert relative path to absolute abs_path = (model_dir / file_attr).resolve() mesh.set("file", str(abs_path)) - # Add mocap bodies for all keypoints (MCP, PIP, TIP, and IP for thumb) - # Use different colors for different joint types - joint_colors = { - "mcp": "0 1 0 0.7", # Green for MCP - "pip": "0 0 1 0.7", # Blue for PIP - "ip": "1 1 0 0.7", # Yellow for IP (thumb only) - "tip": "1 0 0 0.7", # Red for TIP + # All IK target sites use blue for consistency + ik_site_color = "0 0 1 0.9" # Blue for all IK targets + + # Tip site offsets: [x, y, z] where z is upward direction + tip_site_specs = { + "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])), + "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])), + "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])), + "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])), + "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])), } - for finger, joints in FINGER_TARGET_BODIES.items(): - for joint_type in joints.keys(): - body = ET.SubElement(worldbody, "body") - body.set("name", f"target_{finger}_{joint_type}") - body.set("mocap", "true") - body.set("pos", "0 0 0") + if "tip" in target_joint_types: + tip_sites_created = [] + for finger, (parent_body, offset) in tip_site_specs.items(): + body_elem = root.find(f".//body[@name='{parent_body}']") + if body_elem is None: + print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site") + continue + site_name = f"right_{finger}_tip_site" + existing_site = body_elem.find(f"./site[@name='{site_name}']") + if existing_site is not None: + # Update existing site - make it larger and blue + existing_site.set("type", "sphere") # Ensure type is set + existing_site.set("size", "0.005") + existing_site.set("rgba", ik_site_color) + tip_sites_created.append(f"{finger} (updated)") + else: + # Create new tip site + site = ET.SubElement(body_elem, "site") + site.set("name", site_name) + site.set("type", "sphere") # Explicit type for visibility + site.set("pos", " ".join(f"{value:.5f}" for value in offset)) + site.set("size", "0.005") + site.set("rgba", ik_site_color) + tip_sites_created.append(f"{finger} (created)") + print(f"Tip sites: {', '.join(tip_sites_created)}") + + # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms + allowed_types = set(target_joint_types) + markers_added = [] + for finger_name, bodies in FINGER_TARGET_BODIES.items(): + for joint_type, (frame_type, frame_name) in bodies.items(): + if joint_type not in allowed_types: + continue - # Add a visual sphere (smaller than before) - geom = ET.SubElement(body, "geom") - geom.set("type", "sphere") - geom.set("size", "0.003") # 3mm radius (smaller) - geom.set("rgba", joint_colors.get(joint_type, "0.5 0.5 0.5 0.7")) - geom.set("contype", "0") # No collision - geom.set("conaffinity", "0") + if frame_type == "body": + # Add a visual site to the body for visibility + body_elem = root.find(f".//body[@name='{frame_name}']") + if body_elem is not None: + # Check if marker site already exists + marker_site_name = f"ik_marker_{frame_name}" + existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']") + if existing_marker is not None: + # Update existing marker + existing_marker.set("size", "0.004") # Same size as tip sites + existing_marker.set("rgba", ik_site_color) + markers_added.append(f"{finger_name}_{joint_type} (updated)") + else: + # Create new marker site + site = ET.SubElement(body_elem, "site") + site.set("name", marker_site_name) + site.set("pos", "0 0 0") # At body origin + site.set("size", "0.004") # Same size as tip sites for consistency + site.set("rgba", ik_site_color) # Blue for all IK targets + markers_added.append(f"{finger_name}_{joint_type} (created)") + else: + print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}") + elif frame_type == "site": + # This is a tip site - find and update it (should already be handled above, but double-check) + found = False + for body in root.findall(".//body"): + site_elem = body.find(f"./site[@name='{frame_name}']") + if site_elem is not None: + site_elem.set("size", "0.005") + site_elem.set("rgba", ik_site_color) + found = True + markers_added.append(f"{finger_name}_{joint_type} (site updated)") + break + if not found: + print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}") + + print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}") + + # Add mocap bodies for tracker sites (MediaPipe landmarks) + # All tracker sites use red for consistency - these represent the actual tracked hand + tracker_color = "1 0 0 0.9" # Red for all tracker sites (actual hand) + + allowed_types = set(target_joint_types) + for finger_name, mp_mapping in MP_LANDMARK_INDICES.items(): + for joint_type, mp_idx in mp_mapping.items(): + if joint_type not in allowed_types: + continue - tip_site_specs = { - "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.018])), - "index": ("right_index_ip", np.array([0.0, 0.0, 0.020])), - "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.022])), - "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.021])), - "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.018])), - } + # Create mocap body for tracker site + mocap_body = ET.SubElement(worldbody, "body") + mocap_body.set("name", f"tracker_{finger_name}_{joint_type}") + mocap_body.set("mocap", "true") + mocap_body.set("pos", "0 0 0") - for finger, (parent_body, offset) in tip_site_specs.items(): - body_elem = root.find(f".//body[@name='{parent_body}']") - if body_elem is None: - continue - site_name = f"right_{finger}_tip_site" - if body_elem.find(f"./site[@name='{site_name}']") is not None: - continue - site = ET.SubElement(body_elem, "site") - site.set("name", site_name) - site.set("pos", " ".join(f"{value:.5f}" for value in offset)) - site.set("size", "0.0025") - site.set("rgba", "1 0.6 0.2 0.8") + # Add visual sphere for tracker site (larger, red) + geom = ET.SubElement(mocap_body, "geom") + geom.set("type", "sphere") + geom.set("size", "0.005") # 5mm radius (larger than IK targets for visibility) + geom.set("rgba", tracker_color) # Red for all tracker sites + geom.set("contype", "0") + geom.set("conaffinity", "0") return ET.tostring(root, encoding="unicode") @@ -396,7 +465,7 @@ def main() -> None: parser.add_argument( "--targets", type=str, - default="tip, ip", + default="tip", help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip", ) parser.add_argument( @@ -415,9 +484,9 @@ def main() -> None: parser.add_argument( "--joint-smoothing", type=float, - default=1.0, + default=0.7, help="Smoothing factor for joint positions (0.0-1.0). \ - Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)", + Lower values = more smoothing, higher = less smoothing. Default: 0.7", ) args = parser.parse_args() @@ -431,16 +500,15 @@ def main() -> None: parser.error(f"Unsupported target joint types: {', '.join(invalid)}") # 1. Setup paths and model - script_dir = Path(__file__).parent - model_path = script_dir.parent / "models" / args.model if not Path(args.model).is_absolute() else Path(args.model) - - if not model_path.exists(): - print(f"Error: Model not found at {model_path}") - return + model_file = Path(args.model) + if not model_file.is_absolute(): + model_file = Path(__file__).parent.parent / args.model + if not model_file.exists(): + raise FileNotFoundError(f"Model file not found: {model_file}") # 2. Inject visualization bodies (Mocap) - print("Injecting visualization targets...") - xml_string = inject_target_bodies(model_path) + print(f"Loading model from {model_file}") + xml_string = inject_target_bodies(model_file, target_joints) # 3. Load MuJoCo Model model = mujoco.MjModel.from_xml_string(xml_string) @@ -457,22 +525,38 @@ def main() -> None: ik_config = ORCAHandIKConfig( scale_factor=args.scale, - wrist_offset_palm=np.array([0.000, 0.0, -0.05]), target_joint_types=target_joints, + lm_damping=0.5, + ik_iterations=15, ) ik_solver = ORCAHandIKRetargeting(model, config=ik_config) + # Verify tip sites exist in the loaded model + if "tip" in target_joints: + print("\nVerifying tip sites in loaded model:") + for finger in ["thumb", "index", "middle", "ring", "pinky"]: + site_name = f"right_{finger}_tip_site" + site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name) + if site_id >= 0: + site_pos = data.site(site_id).xpos.copy() + print(f" ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]") + else: + print(f" ✗ {site_name}: NOT FOUND in model!") + allowed_joint_types = set(target_joints) # 5. Helper to map finger names and joint types to mocap body IDs + # Note: These use "tracker_" prefix to match the visualization bodies created by inject_target_bodies target_body_ids: dict[str, dict[str, int]] = {} for finger, joints in FINGER_TARGET_BODIES.items(): for joint_type in joints.keys(): if joint_type not in allowed_joint_types: continue finger_dict = target_body_ids.setdefault(finger, {}) - bid = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, f"target_{finger}_{joint_type}") + bid = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, f"tracker_{finger}_{joint_type}") + if bid < 0: + print(f"Warning: Mocap body 'tracker_{finger}_{joint_type}' not found") finger_dict[joint_type] = bid # 6. Main Loop diff --git a/models/orca_hand_fixed.mjcf b/models/orca_hand_fixed.mjcf index 89209ae..2c48d66 100644 --- a/models/orca_hand_fixed.mjcf +++ b/models/orca_hand_fixed.mjcf @@ -3,6 +3,11 @@ + + + + + @@ -26,8 +31,8 @@ - - + + diff --git a/scripts/record_and_process_video.py b/scripts/record_and_process_video.py new file mode 100644 index 0000000..fd54852 --- /dev/null +++ b/scripts/record_and_process_video.py @@ -0,0 +1,862 @@ +"""Record video, process with HAMER, visualize in MuJoCo, and concatenate videos. + +This script: +1. Listens for key press ('r' or 's') to start recording +2. Records video from camera +3. Saves the recorded video +4. Processes video with HAMER frame by frame +5. Opens MuJoCo and visualizes the processed hand tracking +6. Records the MuJoCo visualization +7. Concatenates both videos together +""" + +import argparse +import sys +import time +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Optional + +import cv2 +import mujoco +import mujoco.viewer +import numpy as np + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from handpose.ik_retargeting import ( + FINGER_TARGET_BODIES, + MP_LANDMARK_INDICES, + ORCAHandIKConfig, + ORCAHandIKRetargeting, +) +from handpose.tracker.base import HandStructure +from handpose.tracker.hamer import HaMeRTracker + + +def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str: + """Inject mocap bodies for tracker sites and ensure IK target sites exist.""" + tree = ET.parse(mjcf_path) + root = tree.getroot() + worldbody = root.find("worldbody") + + if worldbody is None: + raise ValueError("Could not find worldbody in MJCF") + + # Convert relative paths to absolute paths + model_dir = mjcf_path.parent + for asset in root.findall(".//asset"): + for mesh in asset.findall("mesh"): + file_attr = mesh.get("file") + if file_attr and not Path(file_attr).is_absolute(): + abs_path = (model_dir / file_attr).resolve() + mesh.set("file", str(abs_path)) + + # IK target sites use blue + ik_site_color = "0 0 1 0.9" + + # Tip site offsets + tip_site_specs = { + "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])), + "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])), + "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])), + "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])), + "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])), + } + + if "tip" in target_joint_types: + for finger, (parent_body, offset) in tip_site_specs.items(): + body_elem = root.find(f".//body[@name='{parent_body}']") + if body_elem is None: + continue + site_name = f"right_{finger}_tip_site" + existing_site = body_elem.find(f"./site[@name='{site_name}']") + if existing_site is not None: + existing_site.set("type", "sphere") + existing_site.set("size", "0.005") + existing_site.set("rgba", ik_site_color) + else: + site = ET.SubElement(body_elem, "site") + site.set("name", site_name) + site.set("type", "sphere") + site.set("pos", " ".join(f"{value:.5f}" for value in offset)) + site.set("size", "0.005") + site.set("rgba", ik_site_color) + + # Add visual markers for IK target bodies + allowed_types = set(target_joint_types) + for finger_name, bodies in FINGER_TARGET_BODIES.items(): + for joint_type, (frame_type, frame_name) in bodies.items(): + if joint_type not in allowed_types: + continue + + if frame_type == "body": + body_elem = root.find(f".//body[@name='{frame_name}']") + if body_elem is not None: + marker_site_name = f"ik_marker_{frame_name}" + existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']") + if existing_marker is not None: + existing_marker.set("size", "0.004") + existing_marker.set("rgba", ik_site_color) + else: + site = ET.SubElement(body_elem, "site") + site.set("name", marker_site_name) + site.set("pos", "0 0 0") + site.set("size", "0.004") + site.set("rgba", ik_site_color) + + # Add mocap bodies for tracker sites (red) + tracker_color = "1 0 0 0.9" + # Add wrist tracker body + wrist_body = ET.SubElement(worldbody, "body") + wrist_body.set("name", "tracker_wrist") + wrist_body.set("mocap", "true") + wrist_body.set("pos", "0 0 0") + wrist_geom = ET.SubElement(wrist_body, "geom") + wrist_geom.set("type", "sphere") + wrist_geom.set("size", "0.005") + wrist_geom.set("rgba", tracker_color) + wrist_geom.set("contype", "0") + wrist_geom.set("conaffinity", "0") + + for finger_name, mp_mapping in MP_LANDMARK_INDICES.items(): + for joint_type, mp_idx in mp_mapping.items(): + if joint_type not in allowed_types: + continue + + mocap_body = ET.SubElement(worldbody, "body") + mocap_body.set("name", f"tracker_{finger_name}_{joint_type}") + mocap_body.set("mocap", "true") + mocap_body.set("pos", "0 0 0") + + geom = ET.SubElement(mocap_body, "geom") + geom.set("type", "sphere") + geom.set("size", "0.005") + geom.set("rgba", tracker_color) + geom.set("contype", "0") + geom.set("conaffinity", "0") + + # Add skeleton lines (cylinders) between joints for skeleton overlay + skeleton_color = "0 1 0 0.8" # Green skeleton lines + skeleton_connections = [ + # Thumb chain + ("tracker_thumb_mcp", "tracker_thumb_ip"), + ("tracker_thumb_ip", "tracker_thumb_tip"), + # Index chain + ("tracker_index_mcp", "tracker_index_pip"), + ("tracker_index_pip", "tracker_index_tip"), + # Middle chain + ("tracker_middle_mcp", "tracker_middle_pip"), + ("tracker_middle_pip", "tracker_middle_tip"), + # Ring chain + ("tracker_ring_mcp", "tracker_ring_pip"), + ("tracker_ring_pip", "tracker_ring_tip"), + # Pinky chain + ("tracker_pinky_mcp", "tracker_pinky_pip"), + ("tracker_pinky_pip", "tracker_pinky_tip"), + ] + + # Add wrist connections (from wrist to each finger MCP) + wrist_connections = [ + ("tracker_index_mcp", "tracker_wrist"), + ("tracker_middle_mcp", "tracker_wrist"), + ("tracker_ring_mcp", "tracker_wrist"), + ("tracker_pinky_mcp", "tracker_wrist"), + ("tracker_thumb_mcp", "tracker_wrist"), + ] + + # Create skeleton line geoms (will be positioned dynamically) + for start_joint, end_joint in skeleton_connections + wrist_connections: + # Create a mocap body for the skeleton line + line_body = ET.SubElement(worldbody, "body") + line_body.set("name", f"skeleton_line_{start_joint}_{end_joint}") + line_body.set("mocap", "true") + line_body.set("pos", "0 0 0") + + # Add cylinder geom for the line + line_geom = ET.SubElement(line_body, "geom") + line_geom.set("type", "cylinder") + line_geom.set("size", "0.001 0.02") # Thin cylinder (radius, half-height) - will be updated dynamically + line_geom.set("rgba", skeleton_color) + line_geom.set("contype", "0") + line_geom.set("conaffinity", "0") + + return ET.tostring(root, encoding="unicode") + + +def hand_structure_to_landmarks(structure: HandStructure) -> np.ndarray: + """Convert HandStructure to 21x3 landmarks array (MediaPipe format).""" + landmarks = np.zeros((21, 3)) + + landmarks[0] = structure.wrist_position + landmarks[1] = structure.thumb.mcp + landmarks[2] = structure.thumb.mcp + landmarks[3] = structure.thumb.ip if structure.thumb.ip is not None else structure.thumb.mcp + landmarks[4] = structure.thumb.tip + landmarks[5] = structure.index.mcp + landmarks[6] = structure.index.pip if structure.index.pip is not None else structure.index.mcp + landmarks[7] = structure.index.dip if structure.index.dip is not None else structure.index.tip + landmarks[8] = structure.index.tip + landmarks[9] = structure.middle.mcp + landmarks[10] = structure.middle.pip if structure.middle.pip is not None else structure.middle.mcp + landmarks[11] = structure.middle.dip if structure.middle.dip is not None else structure.middle.tip + landmarks[12] = structure.middle.tip + landmarks[13] = structure.ring.mcp + landmarks[14] = structure.ring.pip if structure.ring.pip is not None else structure.ring.mcp + landmarks[15] = structure.ring.dip if structure.ring.dip is not None else structure.ring.tip + landmarks[16] = structure.ring.tip + landmarks[17] = structure.pinky.mcp + landmarks[18] = structure.pinky.pip if structure.pinky.pip is not None else structure.pinky.mcp + landmarks[19] = structure.pinky.dip if structure.pinky.dip is not None else structure.pinky.tip + landmarks[20] = structure.pinky.tip + + return landmarks + + +def get_joint_position_world( + model: mujoco.MjModel, + data: mujoco.MjData, + joint_name: str, + hand_structure: HandStructure, + ik_solver: ORCAHandIKRetargeting, + target_joint_types: tuple[str, ...], +) -> np.ndarray | None: + """Get world position of a joint by name.""" + # Check if it's a mocap body we can query directly + if joint_name.startswith("tracker_"): + body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, joint_name) + if body_id >= 0: + mocap_idx = model.body_mocapid[body_id] + if mocap_idx >= 0 and mocap_idx < model.nmocap: + return data.mocap_pos[mocap_idx].copy() + + # Fallback: calculate from hand structure + if joint_name in ("wrist", "tracker_wrist"): + t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body") + p_palm = t_palm.translation() + r_palm = t_palm.rotation().as_matrix() + wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm + return p_palm + wrist_offset_world + + return None + + +def update_skeleton_lines( + model: mujoco.MjModel, + data: mujoco.MjData, + hand_structure: HandStructure, + target_joint_types: tuple[str, ...], + ik_solver: ORCAHandIKRetargeting, +) -> None: + """Update skeleton line positions and orientations between joints.""" + skeleton_connections = [ + ("tracker_thumb_mcp", "tracker_thumb_ip"), + ("tracker_thumb_ip", "tracker_thumb_tip"), + ("tracker_index_mcp", "tracker_index_pip"), + ("tracker_index_pip", "tracker_index_tip"), + ("tracker_middle_mcp", "tracker_middle_pip"), + ("tracker_middle_pip", "tracker_middle_tip"), + ("tracker_ring_mcp", "tracker_ring_pip"), + ("tracker_ring_pip", "tracker_ring_tip"), + ("tracker_pinky_mcp", "tracker_pinky_pip"), + ("tracker_pinky_pip", "tracker_pinky_tip"), + ("tracker_index_mcp", "tracker_wrist"), + ("tracker_middle_mcp", "tracker_wrist"), + ("tracker_ring_mcp", "tracker_wrist"), + ("tracker_pinky_mcp", "tracker_wrist"), + ("tracker_thumb_mcp", "tracker_wrist"), + ] + + for start_joint, end_joint in skeleton_connections: + p1 = get_joint_position_world(model, data, start_joint, hand_structure, ik_solver, target_joint_types) + p2 = get_joint_position_world(model, data, end_joint, hand_structure, ik_solver, target_joint_types) + + if p1 is None or p2 is None: + continue + + # Calculate midpoint and direction + midpoint = (p1 + p2) / 2.0 + direction = p2 - p1 + length = np.linalg.norm(direction) + + if length < 1e-6: + continue + + direction = direction / length + + # Create rotation quaternion to align cylinder with direction + # Default cylinder axis is [0, 0, 1], we want it along direction + z_axis = np.array([0, 0, 1]) + if np.abs(np.dot(direction, z_axis)) > 0.99: + # Parallel to z-axis, use identity + quat = np.array([1, 0, 0, 0]) + else: + # Calculate rotation axis and angle + rot_axis = np.cross(z_axis, direction) + rot_axis = rot_axis / np.linalg.norm(rot_axis) + cos_angle = np.dot(z_axis, direction) + angle = np.arccos(np.clip(cos_angle, -1, 1)) + quat = np.array([ + np.cos(angle / 2), + rot_axis[0] * np.sin(angle / 2), + rot_axis[1] * np.sin(angle / 2), + rot_axis[2] * np.sin(angle / 2), + ]) + + # Update mocap body position and orientation + line_body_name = f"skeleton_line_{start_joint}_{end_joint}" + body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, line_body_name) + if body_id >= 0: + mocap_idx = model.body_mocapid[body_id] + if mocap_idx >= 0 and mocap_idx < model.nmocap: + data.mocap_pos[mocap_idx] = midpoint + data.mocap_quat[mocap_idx] = quat + + +def update_tracker_sites( + model: mujoco.MjModel, + data: mujoco.MjData, + hand_structure: HandStructure, + target_joint_types: tuple[str, ...], + ik_solver: ORCAHandIKRetargeting, +) -> None: + """Update mocap body positions for tracker sites based on hand structure.""" + landmarks_hand_frame = hand_structure_to_landmarks(hand_structure) + + t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body") + p_palm = t_palm.translation() + r_palm = t_palm.rotation().as_matrix() + + wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm + p_wrist = p_palm + wrist_offset_world + + scale = ik_solver.config.scale_factor + coord_transform = ik_solver.config.coord_transform + + allowed_types = set(target_joint_types) + for finger_name, mp_mapping in MP_LANDMARK_INDICES.items(): + for joint_type, mp_idx in mp_mapping.items(): + if joint_type not in allowed_types: + continue + + landmark_pos_hand_frame = landmarks_hand_frame[mp_idx] + scaled_vec = landmark_pos_hand_frame * scale + target_pos_world = p_wrist + scaled_vec + + rel_vec = target_pos_world - p_wrist + transformed_vec = coord_transform @ rel_vec + final_pos = p_wrist + transformed_vec + + mocap_body_name = f"tracker_{finger_name}_{joint_type}" + body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, mocap_body_name) + if body_id >= 0: + mocap_idx = model.body_mocapid[body_id] + if mocap_idx >= 0 and mocap_idx < model.nmocap: + data.mocap_pos[mocap_idx] = final_pos + + # Update wrist tracker position + wrist_body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, "tracker_wrist") + if wrist_body_id >= 0: + wrist_mocap_idx = model.body_mocapid[wrist_body_id] + if wrist_mocap_idx >= 0 and wrist_mocap_idx < model.nmocap: + data.mocap_pos[wrist_mocap_idx] = p_wrist + + # Update skeleton lines after updating tracker sites + update_skeleton_lines(model, data, hand_structure, target_joint_types, ik_solver) + + +def record_camera_video(cap: cv2.VideoCapture, output_path: Path, fps: float = 30.0) -> bool: + """Record video from camera until 'q' is pressed. + + Returns True if recording was successful, False otherwise. + """ + print(f"\n{'='*70}") + print("VIDEO RECORDING") + print(f"{'='*70}") + print("Press 'r' or 's' to START recording") + print("Press 'q' to STOP recording and save") + print(f"{'='*70}\n") + + # Set camera FPS if possible + cap.set(cv2.CAP_PROP_FPS, fps) + + # Wait for start key + recording = False + out = None + frame_count = 0 + start_time = None + frame_time = 1.0 / fps # Time per frame in seconds + target_time = time.time() + frame_time + + while True: + ret, frame = cap.read() + if not ret: + print("Failed to read from camera") + if out is not None: + out.release() + cv2.destroyAllWindows() + return False + + display_frame = frame.copy() + if not recording: + cv2.putText( + display_frame, + "Press 'r' or 's' to START recording", + (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, + 1, + (0, 255, 0), + 2, + ) + else: + elapsed = time.time() - start_time + cv2.putText( + display_frame, + f"RECORDING... {elapsed:.1f}s ({frame_count} frames) - Press 'q' to STOP", + (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (0, 0, 255), + 2, + ) + + cv2.imshow("Camera Feed - Recording", display_frame) + key = cv2.waitKey(1) & 0xFF + + if key == ord("q") and recording: + break + elif (key == ord("r") or key == ord("s")) and not recording: + recording = True + # Get frame dimensions + height, width = frame.shape[:2] + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height)) + if not out.isOpened(): + print(f"Error: Failed to open video writer for {output_path}") + cv2.destroyAllWindows() + return False + start_time = time.time() + print(f"Recording started at {fps} FPS. Saving to: {output_path}") + + if recording and out is not None: + # Write frame and control timing + out.write(frame) + frame_count += 1 + + # Sleep to maintain target FPS + sleep_time = max(0, target_time - time.time()) + if sleep_time > 0: + time.sleep(sleep_time) + target_time += frame_time + + + if out is not None: + out.release() + elapsed = time.time() - start_time if start_time else 0 + print(f"Recording stopped. Saved {frame_count} frames ({elapsed:.2f}s) to {output_path}") + print(f"Actual FPS: {frame_count / elapsed:.2f} (target: {fps:.2f})") + + cv2.destroyAllWindows() + return recording + + +def process_video_with_hamer( + video_path: Path, + tracker: HaMeRTracker, + ik_solver: ORCAHandIKRetargeting, + model: mujoco.MjModel, + data: mujoco.MjData, + target_joint_types: tuple[str, ...], +) -> list[tuple[np.ndarray, Optional[HandStructure]]]: + """Process video with HAMER and return frames with hand structures.""" + print(f"\n{'='*70}") + print("PROCESSING VIDEO WITH HAMER") + print(f"{'='*70}") + print(f"Processing: {video_path}") + + cap = cv2.VideoCapture(str(video_path)) + if not cap.isOpened(): + raise RuntimeError(f"Failed to open video: {video_path}") + + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + print(f"Video: {frame_count} frames at {fps} FPS") + + processed_frames = [] + timestamp = 0.0 + dt = 1.0 / fps if fps > 0 else 1.0 / 30.0 + + frame_idx = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + # Use previous frame's solution as warm start for better IK convergence + # Only reset on first frame + if frame_idx == 0: + mujoco.mj_resetData(model, data) + + timestamp = frame_idx * dt + hand_structures = tracker.detect_hands(frame, timestamp=timestamp) + + hand_structure = hand_structures[0] if hand_structures else None + + if hand_structure: + # Update IK solver with current state (warm start from previous frame) + mujoco.mj_forward(model, data) + ik_solver.configuration.update(data.qpos) + + # Solve IK + qpos = ik_solver.solve(hand_structure) + if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)): + data.qpos[:] = qpos + data.qvel[:] = 0 + + # Update tracker site positions + update_tracker_sites(model, data, hand_structure, target_joint_types, ik_solver) + + # Forward kinematics + mujoco.mj_forward(model, data) + + processed_frames.append((frame.copy(), hand_structure)) + frame_idx += 1 + + if frame_idx % 10 == 0: + print(f"Processed {frame_idx}/{frame_count} frames...") + + cap.release() + print(f"Processing complete. Processed {len(processed_frames)} frames.") + return processed_frames + + +def record_mujoco_visualization( + processed_frames: list[tuple[np.ndarray, Optional[HandStructure]]], + model: mujoco.MjModel, + data: mujoco.MjData, + output_path: Path, + fps: float = 30.0, + width: int = 1920, + height: int = 1080, + ik_solver: Optional[ORCAHandIKRetargeting] = None, + target_joint_types: tuple[str, ...] = ("tip",), +) -> None: + """Record MuJoCo visualization from processed frames.""" + print(f"\n{'='*70}") + print("RECORDING MUJOCO VISUALIZATION") + print(f"{'='*70}") + print(f"Recording {len(processed_frames)} frames to: {output_path}") + + # Create renderer + # The model should have framebuffer settings in MJCF for larger resolutions + try: + renderer = mujoco.Renderer(model, height=height, width=width, max_geom=10000) + except ValueError as e: + # Fallback to smaller resolution if framebuffer is too small + print(f"Warning: {e}") + print("Falling back to 640x480 resolution") + width = 640 + height = 480 + renderer = mujoco.Renderer(model, height=height, width=width, max_geom=10000) + + # Setup camera + cam = mujoco.MjvCamera() + cam.type = mujoco.mjtCamera.mjCAMERA_FREE + cam.distance = 0.6 # Further back to fit entire hand + cam.azimuth = -20 + cam.elevation = 35 + cam.lookat[:] = [0.04, 0.0, 0.08] # Look at palm area + + # Setup options - enable transparency and joint visualization for skeleton overlay + option = mujoco.MjvOption() + option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = False + option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = False + option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = True # Make meshes transparent + option.flags[mujoco.mjtVisFlag.mjVIS_JOINT] = True # Show joints for skeleton overlay + + # Create video writer + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height)) + + # Process each frame + for frame_idx, (frame, hand_structure) in enumerate(processed_frames): + # Only reset on first frame + if frame_idx == 0: + mujoco.mj_resetData(model, data) + + if hand_structure is not None and ik_solver is not None: + # Update IK solver configuration + mujoco.mj_forward(model, data) + ik_solver.configuration.update(data.qpos) + + # Solve IK + qpos = ik_solver.solve(hand_structure) + if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)): + data.qpos[:] = qpos + data.qvel[:] = 0 + + # Update tracker site positions + update_tracker_sites(model, data, hand_structure, target_joint_types, ik_solver) + + # Forward kinematics + mujoco.mj_forward(model, data) + + # Update scene with current state + renderer.update_scene(data, camera=cam, scene_option=option) + + # Render to numpy array (returns RGB, shape: (height, width, 3)) + rgb_frame = renderer.render() + + # Convert RGB to BGR for OpenCV + # Note: MuJoCo Renderer already returns correct orientation, no need to flip + bgr_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR) + + out.write(bgr_frame) + + if (frame_idx + 1) % 10 == 0: + print(f"Rendered {frame_idx + 1}/{len(processed_frames)} frames...") + + out.release() + renderer.close() + print(f"MuJoCo visualization recording complete. Saved to: {output_path}") + + +def concatenate_videos(video1_path: Path, video2_path: Path, output_path: Path) -> None: + """Concatenate two videos side by side.""" + print(f"\n{'='*70}") + print("CONCATENATING VIDEOS") + print(f"{'='*70}") + print(f"Concatenating {video1_path} and {video2_path}") + print(f"Output: {output_path}") + + cap1 = cv2.VideoCapture(str(video1_path)) + cap2 = cv2.VideoCapture(str(video2_path)) + + if not cap1.isOpened() or not cap2.isOpened(): + raise RuntimeError("Failed to open one or both videos") + + # Get properties + fps1 = cap1.get(cv2.CAP_PROP_FPS) + fps2 = cap2.get(cv2.CAP_PROP_FPS) + fps = min(fps1, fps2) if fps1 > 0 and fps2 > 0 else 30.0 + + width1 = int(cap1.get(cv2.CAP_PROP_FRAME_WIDTH)) + height1 = int(cap1.get(cv2.CAP_PROP_FRAME_HEIGHT)) + width2 = int(cap2.get(cv2.CAP_PROP_FRAME_WIDTH)) + height2 = int(cap2.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + # Use the maximum height and sum of widths for side-by-side + max_height = max(height1, height2) + total_width = width1 + width2 + + # Resize videos to same height if needed + if height1 != height2: + scale1 = max_height / height1 + scale2 = max_height / height2 + width1 = int(width1 * scale1) + width2 = int(width2 * scale2) + total_width = width1 + width2 + + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + out = cv2.VideoWriter(str(output_path), fourcc, fps, (total_width, max_height)) + + frame_count = 0 + while True: + ret1, frame1 = cap1.read() + ret2, frame2 = cap2.read() + + if not ret1 or not ret2: + break + + # Resize if needed + if height1 != max_height: + frame1 = cv2.resize(frame1, (width1, max_height)) + if height2 != max_height: + frame2 = cv2.resize(frame2, (width2, max_height)) + + # Concatenate side by side + concatenated = np.hstack([frame1, frame2]) + out.write(concatenated) + + frame_count += 1 + if frame_count % 10 == 0: + print(f"Concatenated {frame_count} frames...") + + cap1.release() + cap2.release() + out.release() + print(f"Concatenation complete. Saved {frame_count} frames to {output_path}") + + +def main() -> None: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Record video, process with HAMER, visualize in MuJoCo, and concatenate", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--camera", type=int, default=0, help="Camera device index") + parser.add_argument( + "--model", + type=str, + default="models/orca_hand_fixed.mjcf", + help="Path to MuJoCo MJCF model file", + ) + parser.add_argument( + "--targets", + type=str, + default="tip", + help="Comma-separated joint targets (tip,ip,pip,mcp)", + ) + parser.add_argument( + "--scale", + type=float, + default=1.0, + help="Hand scale factor (robot/human)", + ) + parser.add_argument( + "--fps", + type=float, + default=30.0, + help="Video FPS", + ) + parser.add_argument( + "--output-dir", + type=str, + default="recordings", + help="Output directory for videos", + ) + parser.add_argument( + "--video-path", + type=str, + default=None, + help="Path to input video file (if provided, skips camera recording and uses this video)", + ) + args = parser.parse_args() + + # Parse target joint types + raw_targets = [part.strip().lower() for part in args.targets.split(",")] + target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt)) + if not target_joints: + target_joints = ("tip",) + supported_targets = {"tip", "ip", "pip", "mcp"} + invalid = [jt for jt in target_joints if jt not in supported_targets] + if invalid: + parser.error(f"Unsupported target joint types: {', '.join(invalid)}") + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Generate output filenames + timestamp = int(time.time()) + if args.video_path: + # Use input video name as base for output files + input_video_name = Path(args.video_path).stem + camera_video_path = Path(args.video_path) # Use input video directly + mujoco_video_path = output_dir / f"mujoco_{input_video_name}_{timestamp}.mp4" + final_video_path = output_dir / f"concatenated_{input_video_name}_{timestamp}.mp4" + else: + camera_video_path = output_dir / f"camera_recording_{timestamp}.mp4" + mujoco_video_path = output_dir / f"mujoco_recording_{timestamp}.mp4" + final_video_path = output_dir / f"concatenated_{timestamp}.mp4" + + # Load model + model_file = Path(args.model) + if not model_file.is_absolute(): + model_file = Path(__file__).parent.parent / args.model + if not model_file.exists(): + raise FileNotFoundError(f"Model file not found: {model_file}") + + print(f"Loading model from {model_file}") + xml_string = inject_visualization_bodies(model_file, target_joints) + model = mujoco.MjModel.from_xml_string(xml_string) + data = mujoco.MjData(model) + + # Initialize tracker and IK solver + tracker = HaMeRTracker() + ik_config = ORCAHandIKConfig( + scale_factor=args.scale, + target_joint_types=target_joints, + lm_damping=0.5, + ik_iterations=15, + ) + ik_solver = ORCAHandIKRetargeting(model, config=ik_config) + + # Verify tip sites exist in the loaded model + if "tip" in target_joints: + print("\nVerifying tip sites in loaded model:") + for finger in ["thumb", "index", "middle", "ring", "pinky"]: + site_name = f"right_{finger}_tip_site" + site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name) + if site_id >= 0: + site_pos = data.site(site_id).xpos.copy() + print(f" ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]") + else: + print(f" ✗ {site_name}: NOT FOUND in model!") + + # Step 1: Record camera video or use provided video file + if args.video_path: + camera_video_path = Path(args.video_path) + if not camera_video_path.exists(): + raise FileNotFoundError(f"Video file not found: {camera_video_path}") + print(f"Using existing video: {camera_video_path}") + else: + cap = cv2.VideoCapture(args.camera) + if not cap.isOpened(): + raise RuntimeError(f"Failed to open camera {args.camera}") + + if not record_camera_video(cap, camera_video_path, args.fps): + print("Recording cancelled or failed.") + cap.release() + return + + cap.release() + + # Step 2: Process video with HAMER + processed_frames = process_video_with_hamer( + camera_video_path, + tracker, + ik_solver, + model, + data, + target_joints, + ) + + # Get video FPS (use input video FPS if provided, otherwise use args.fps) + if args.video_path: + cap = cv2.VideoCapture(str(camera_video_path)) + video_fps = cap.get(cv2.CAP_PROP_FPS) + cap.release() + if video_fps > 0: + fps = video_fps + else: + fps = args.fps + else: + fps = args.fps + + # Step 3: Record MuJoCo visualization + record_mujoco_visualization( + processed_frames, + model, + data, + mujoco_video_path, + fps, + ik_solver=ik_solver, + target_joint_types=target_joints, + ) + + # Step 4: Concatenate videos + concatenate_videos(camera_video_path, mujoco_video_path, final_video_path) + + print(f"\n{'='*70}") + print("ALL DONE!") + print(f"{'='*70}") + print(f"Camera video: {camera_video_path}") + print(f"MuJoCo video: {mujoco_video_path}") + print(f"Concatenated video: {final_video_path}") + print(f"{'='*70}\n") + + +if __name__ == "__main__": + main() diff --git a/scripts/visualize_tracker_ik_sites.py b/scripts/visualize_tracker_ik_sites.py index d7c1acf..ac99398 100644 --- a/scripts/visualize_tracker_ik_sites.py +++ b/scripts/visualize_tracker_ik_sites.py @@ -367,8 +367,8 @@ def main() -> None: parser.add_argument( "--targets", type=str, - default="tip, ip", - help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip", + default="tip", + help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip", ) parser.add_argument( "--scale", @@ -381,6 +381,13 @@ def main() -> None: action="store_true", help="Show camera feed with hand tracking overlay", ) + parser.add_argument( + "--joint-smoothing", + type=float, + default=0.7, + help="Smoothing factor for joint positions (0.0-1.0). \ + Lower values = more smoothing, higher = less smoothing. Default: 0.7", + ) args = parser.parse_args() # Parse target joint types @@ -427,7 +434,8 @@ def main() -> None: ik_config = ORCAHandIKConfig( scale_factor=args.scale, target_joint_types=target_joints, - lm_damping=0.5 + lm_damping=0.5, + ik_iterations=15, ) ik_solver = ORCAHandIKRetargeting(model, config=ik_config) @@ -450,8 +458,15 @@ def main() -> None: print("\nPress 'q' in MuJoCo viewer to quit") print("=" * 70 + "\n") + # Validate joint smoothing value + if not (0.0 < args.joint_smoothing <= 1.0): + parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)") + start_time = time.time() + # Initialize joint position smoothing state + smoothed_qpos = data.qpos.copy() + # Set up multiprocessing for display window if requested display_process: mp.Process | None = None frame_queue: mp.Queue | None = None @@ -491,14 +506,20 @@ def main() -> None: if hand_structures: hand = hand_structures[0] - # Update IK solver configuration + # Update IK solver configuration with current smoothed state + mujoco.mj_forward(model, data) + data.qpos[:] = smoothed_qpos mujoco.mj_forward(model, data) ik_solver.configuration.update(data.qpos) # Solve IK qpos = ik_solver.solve(hand) + + # Apply joint position smoothing if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)): - data.qpos[:] = qpos + # Exponential moving average: smoothed = joint_smoothing * new + (1 - joint_smoothing) * old + smoothed_qpos = args.joint_smoothing * qpos + (1.0 - args.joint_smoothing) * smoothed_qpos + data.qpos[:] = smoothed_qpos data.qvel[:] = 0 # Update tracker site positions From a84de55a38d5c6511c519e06852e1b3b3571e36e Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 3 Feb 2026 05:14:00 -0500 Subject: [PATCH 15/17] ignore recordings --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 58be3ff..8e2662d 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,5 @@ out*/ output/ *.npz *.pkl +recordings/ From 6164b8cfbe8dc0b0d7440b92b5d804bbf8ed2e26 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Tue, 3 Feb 2026 14:47:01 -0500 Subject: [PATCH 16/17] rm autoscale --- examples/live_demo_ik.py | 10 --- handpose/ik_retargeting.py | 136 ------------------------------------- 2 files changed, 146 deletions(-) diff --git a/examples/live_demo_ik.py b/examples/live_demo_ik.py index 7c8fba9..be9a7a9 100644 --- a/examples/live_demo_ik.py +++ b/examples/live_demo_ik.py @@ -363,16 +363,6 @@ async def key_handler(key: str) -> None: if hand_structures: structure = hand_structures[0] - # --- AUTO-SCALE (once mode) --- - if auto_scale_once and not scale_computed_once and ik_solver.config.auto_scale: - # Temporarily disable continuous auto-scaling - ik_solver.config.auto_scale = False - # Compute scale factor once - computed_scale = ik_solver.compute_auto_scale_factor(structure, use_neutral_robot_pose=True) - ik_solver.config.scale_factor = computed_scale - scale_computed_once = True - print(f"[Auto-Scale] Computed scale factor: {computed_scale:.3f}") - # --- IK SOLVE --- # 1. Update the configuration object with current robot state mujoco.mj_forward(model, data) diff --git a/handpose/ik_retargeting.py b/handpose/ik_retargeting.py index 089ad6e..170b2c8 100644 --- a/handpose/ik_retargeting.py +++ b/handpose/ik_retargeting.py @@ -131,13 +131,6 @@ class ORCAHandIKConfig: coord_transform: np.ndarray | None = None target_joint_types: tuple[str, ...] = ("tip",) - # Auto-scaling options - auto_scale: bool = False # If True, automatically computes scale_factor from fingertip distances - auto_scale_update_rate: float = ( - 0.1 # Exponential smoothing factor for auto-scaling (0.0 = no smoothing, 1.0 = instant) - ) - auto_scale_use_neutral_pose: bool = True # Use neutral robot pose for scale computation - def __post_init__(self) -> None: """Initialize default values if None.""" if self.wrist_offset_palm is None: @@ -176,10 +169,6 @@ def __init__( self.model = model self.configuration = mink.Configuration(model) - # Initialize config - if config is None: - config = ORCAHandIKConfig.default_config() - self.config = config # Identify the qpos indices for the finger joints we want to control @@ -233,117 +222,6 @@ def __init__( ) self.limits.append(collision_limit) - def compute_auto_scale_factor(self, hand_structure: HandStructure, use_neutral_robot_pose: bool = True) -> float: - """Compute automatic scale factor based on fingertip distances. - - Compares the distance from wrist to fingertips in the human hand - with the distance from wrist to fingertips in the ORCA hand model. - - Args: - hand_structure: The tracked human hand structure - use_neutral_robot_pose: If True, uses neutral robot pose (q=0). - If False, uses current robot configuration. - - Returns: - Scale factor (robot_fingertip_distance / human_fingertip_distance) - """ - # Get human hand fingertip positions relative to wrist - wrist_pos = hand_structure.wrist_position - human_fingertips = { - "thumb": hand_structure.thumb.tip, - "index": hand_structure.index.tip, - "middle": hand_structure.middle.tip, - "ring": hand_structure.ring.tip, - "pinky": hand_structure.pinky.tip, - } - - # Compute average distance from wrist to fingertips in human hand - human_distances = [] - for finger, tip_pos in human_fingertips.items(): - dist = np.linalg.norm(tip_pos - wrist_pos) - if dist > 0.01: # Filter out invalid/too-small distances - human_distances.append(dist) - - if len(human_distances) == 0: - return self.config.scale_factor # Fallback to current scale - - avg_human_distance = np.mean(human_distances) - - # Get robot hand fingertip positions - if use_neutral_robot_pose: - # Save current configuration - saved_q = self.configuration.q.copy() - # Set to neutral pose - self.configuration.q[:] = 0.0 - self.configuration.update() - - # Get wrist position in robot model - t_palm = self.configuration.get_transform_frame_to_world("right_palm", "body") - p_palm = t_palm.translation() - r_palm = t_palm.rotation().as_matrix() - wrist_offset_world = r_palm @ self.config.wrist_offset_palm - p_wrist_robot = p_palm + wrist_offset_world - - # Get fingertip positions from robot model using mink Configuration - robot_fingertips = {} - for finger_name in ["thumb", "index", "middle", "ring", "pinky"]: - if finger_name not in self.tasks: - continue - finger_tasks = self.tasks[finger_name] - if "tip" not in finger_tasks: - continue - - # Get the frame name and type - frame_type, frame_name = FINGER_TARGET_BODIES[finger_name]["tip"] - - # Get transform from mink Configuration - try: - if frame_type == "body": - t_tip = self.configuration.get_transform_frame_to_world(frame_name, "body") - else: # site - t_tip = self.configuration.get_transform_frame_to_world(frame_name, "site") - tip_pos = t_tip.translation() - robot_fingertips[finger_name] = tip_pos - except Exception: - # Fallback: try to get from MuJoCo model directly - obj_type = mujoco.mjtObj.mjOBJ_BODY if frame_type == "body" else mujoco.mjtObj.mjOBJ_SITE - frame_id = mujoco.mj_name2id(self.model, obj_type, frame_name) - if frame_id >= 0: - # Need to forward kinematics - use data - data = mujoco.MjData(self.model) - data.qpos[:] = self.configuration.q - mujoco.mj_forward(self.model, data) - if obj_type == mujoco.mjtObj.mjOBJ_SITE: - tip_pos = data.site(frame_id).xpos.copy() - else: - tip_pos = data.body(frame_id).xpos.copy() - robot_fingertips[finger_name] = tip_pos - - # Restore configuration if we changed it - if use_neutral_robot_pose: - self.configuration.q[:] = saved_q - self.configuration.update() - - # Compute average distance from wrist to fingertips in robot hand - robot_distances = [] - for finger, tip_pos in robot_fingertips.items(): - dist = np.linalg.norm(tip_pos - p_wrist_robot) - if dist > 0.01: # Filter out invalid/too-small distances - robot_distances.append(dist) - - if len(robot_distances) == 0: - return self.config.scale_factor # Fallback to current scale - - avg_robot_distance = np.mean(robot_distances) - - # Compute scale factor - if avg_human_distance > 1e-6: - scale_factor = avg_robot_distance / avg_human_distance - else: - return self.config.scale_factor # Fallback - - return scale_factor - def _hand_structure_to_landmarks(self, hand_structure: HandStructure) -> np.ndarray: """Convert HandStructure to 21x3 landmarks array (MediaPipe format). @@ -464,20 +342,6 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray: Returns: The full qpos array for the robot. """ - # Auto-scale if enabled - if self.config.auto_scale and isinstance(hand_input, HandStructure): - new_scale = self.compute_auto_scale_factor( - hand_input, use_neutral_robot_pose=self.config.auto_scale_use_neutral_pose - ) - # Apply exponential smoothing to avoid jitter - if self.config.auto_scale_update_rate > 0.0: - self.config.scale_factor = ( - self.config.auto_scale_update_rate * new_scale - + (1.0 - self.config.auto_scale_update_rate) * self.config.scale_factor - ) - else: - self.config.scale_factor = new_scale - targets = self.compute_target_positions(hand_input) # Use configurable parameters From e256cc391aa6ed960ad7c0a71abedaa3b10a8598 Mon Sep 17 00:00:00 2001 From: Wesley Maa Date: Mon, 6 Apr 2026 00:43:12 -0400 Subject: [PATCH 17/17] lint --- handpose/ik_retargeting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/handpose/ik_retargeting.py b/handpose/ik_retargeting.py index 170b2c8..72453a2 100644 --- a/handpose/ik_retargeting.py +++ b/handpose/ik_retargeting.py @@ -5,7 +5,7 @@ import mink import mujoco import numpy as np -from mink.limits import CollisionAvoidanceLimit, ConfigurationLimit +from mink.limits import CollisionAvoidanceLimit, ConfigurationLimit, Limit from handpose.tracker.base import HandStructure @@ -211,7 +211,7 @@ def __init__( ) # Initialize collision avoidance limits if enabled - self.limits = [ConfigurationLimit(model=model)] + self.limits: list[Limit] = [ConfigurationLimit(model=model)] if self.config.use_collision_avoidance and self.config.collision_geom_pairs: collision_limit = CollisionAvoidanceLimit( model=model,