diff --git a/.gitignore b/.gitignore index e52d08ba32..545a889dec 100644 --- a/.gitignore +++ b/.gitignore @@ -63,4 +63,8 @@ yolo11n.pt /.mypy_cache* *mobileclip* + /results + +# Teleop SSL certificates +assets/teleop_certs/ diff --git a/dimos/msgs/geometry_msgs/Pose.py b/dimos/msgs/geometry_msgs/Pose.py index bf6a821cc8..cb2d5143d9 100644 --- a/dimos/msgs/geometry_msgs/Pose.py +++ b/dimos/msgs/geometry_msgs/Pose.py @@ -222,6 +222,19 @@ def __add__(self, other: Pose | PoseConvertable | LCMTransform | Transform) -> P return Pose(new_position, new_orientation) + def __sub__(self, other: Pose) -> Pose: + """Compute the delta pose: self - other. + + For position: simple subtraction. + For orientation: delta_quat = self.orientation * inverse(other.orientation) + + Returns: + A new Pose representing the delta transformation + """ + delta_position = self.position - other.position + delta_orientation = self.orientation * other.orientation.inverse() + return Pose(delta_position, delta_orientation) + @classmethod def from_ros_msg(cls, ros_msg: ROSPose) -> Pose: """Create a Pose from a ROS geometry_msgs/Pose message. diff --git a/dimos/msgs/std_msgs/Float32.py b/dimos/msgs/std_msgs/Float32.py new file mode 100644 index 0000000000..afd9bd6163 --- /dev/null +++ b/dimos/msgs/std_msgs/Float32.py @@ -0,0 +1,29 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Float32 message type.""" + +from typing import ClassVar + +from dimos_lcm.std_msgs import Float32 as LCMFloat32 + + +class Float32(LCMFloat32): # type: ignore[misc] + """ROS-compatible Float32 message.""" + + msg_name: ClassVar[str] = "std_msgs.Float32" + + def __init__(self, data: float = 0.0) -> None: + """Initialize Float32 with data value.""" + self.data = data diff --git a/dimos/msgs/std_msgs/__init__.py b/dimos/msgs/std_msgs/__init__.py index 9002b8c4ef..1404d547ee 100644 --- a/dimos/msgs/std_msgs/__init__.py +++ b/dimos/msgs/std_msgs/__init__.py @@ -13,8 +13,9 @@ # limitations under the License. from .Bool import Bool +from .Float32 import Float32 from .Header import Header from .Int8 import Int8 from .Int32 import Int32 -__all__ = ["Bool", "Header", "Int8", "Int32"] +__all__ = ["Bool", "Float32", "Header", "Int8", "Int32"] diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index c241485472..914eade74a 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -54,6 +54,8 @@ "demo-google-maps-skill": "dimos.agents.skills.demo_google_maps_skill:demo_google_maps_skill", "demo-object-scene-registration": "dimos.perception.demo_object_scene_registration:demo_object_scene_registration", "demo-error-on-name-conflicts": "dimos.robot.unitree_webrtc.demo_error_on_name_conflicts:blueprint", + # Teleop blueprints + "quest3-teleop": "dimos.teleop.teleop_blueprints:quest3_teleop", } diff --git a/dimos/teleop/README.md b/dimos/teleop/README.md new file mode 100644 index 0000000000..cf126afc78 --- /dev/null +++ b/dimos/teleop/README.md @@ -0,0 +1,113 @@ +# Teleoperation + +VR teleoperation system for controlling robots with Meta Quest controllers. + +## Folder Structure + +``` +teleop/ +├── __init__.py # Exports quest3_teleop blueprint +├── teleop_blueprints.py # Pre-built system configurations +├── devices/ # Teleop input modules +│ ├── base_teleop_module.py # BaseTeleopModule (calibration, deltas, transforms) +│ └── vr_teleop_module.py # VRTeleopModule (LCM inputs) +└── web/ # Quest WebXR interface + ├── teleop_server.ts # Deno WebSocket-LCM bridge + ├── static/index.html # WebXR client + └── certs/ # SSL certificates +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Quest 3 Headset │ +│ ┌─────────────┐ │ +│ │ WebXR App │ ──── Controller poses + triggers ────┐ │ +│ └─────────────┘ │ │ +└───────────────────────────────────────────────────────│─────────────┘ + │ WebSocket + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Deno Bridge (teleop_server.ts) │ +│ WebSocket ←→ LCM packet forwarding │ +└───────────────────────────────────────────────────────│─────────────┘ + │ LCM + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ VRTeleopModule │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Calibrate │ → │ Compute Delta│ → │ Transform │ │ +│ │ (X button) │ │ curr - init │ │ & Publish │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +└───────────────────────────────────────────────────────│─────────────┘ + │ + ┌───────────────────────────────────┴───────────┐ + ▼ ▼ + controller_delta_0/1 controller_delta_2/3 + (PoseStamped) (TwistStamped) + target = init + Δ vel = scale(Δ) +``` + +## Quick Start + +```python +from dimos.teleop import quest3_teleop + +coordinator = quest3_teleop.build() +coordinator.loop() +``` + +Then on Quest 3: Open `https://:8443`, press X to calibrate and start. + +## Output Types Configuration + +The `output_types` parameter determines how each controller input is mapped: + +| Output Type | Indices | Use Case | +|-------------|---------|----------| +| `PoseStamped` | 0, 1 | Manipulators, end-effector control | +| `TwistStamped` | 2, 3 | Locomotion, velocity control | + +The system auto-computes active indices from output types: +- `[PoseStamped, PoseStamped]` → indices `[0, 1]` (dual arm) +- `[TwistStamped, TwistStamped]` → indices `[2, 3]` (dual locomotion) +- `[PoseStamped, TwistStamped]` → indices `[0, 2]` (arm + quadruped) + +## Custom Setup + +```python +from dimos.teleop.devices import vr_teleop_module +from dimos.msgs.geometry_msgs import PoseStamped, TwistStamped +from dimos.core.blueprints import autoconnect + +# Dual arm setup (both controllers → PoseStamped) +dual_arm = autoconnect( + vr_teleop_module( + output_types=[PoseStamped, PoseStamped], + input_labels=["left_arm", "right_arm"], + ), +) + +# Arm + Quadruped (left → PoseStamped index 0, right → TwistStamped index 2) +arm_and_quad = autoconnect( + vr_teleop_module( + output_types=[PoseStamped, TwistStamped], + input_labels=["left_arm", "right_quad"], + ), +) + +# With RPC for initial robot pose (arm calibration) +arm_with_rpc = autoconnect( + vr_teleop_module( + output_types=[PoseStamped, PoseStamped], + input_labels=["left_arm", "right_arm"], + robot_pose_rpc_methods=["LeftArm.get_ee_pose", "RightArm.get_ee_pose"], + ), +) +``` + +## Submodules + +- [devices/](devices/) - Teleop input modules +- [web/](web/) - Quest WebXR client and Deno server diff --git a/dimos/teleop/__init__.py b/dimos/teleop/__init__.py new file mode 100644 index 0000000000..d01ad6094c --- /dev/null +++ b/dimos/teleop/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Teleoperation module for dimos.""" + +from dimos.teleop.teleop_blueprints import quest3_teleop + +__all__ = [ + "quest3_teleop", +] diff --git a/dimos/teleop/devices/README.md b/dimos/teleop/devices/README.md new file mode 100644 index 0000000000..d82d18c27d --- /dev/null +++ b/dimos/teleop/devices/README.md @@ -0,0 +1,104 @@ +# Teleop Devices + +Input device modules that capture tracking data, compute deltas, and transform to robot commands. + +## Modules + +### BaseTeleopModule + +Abstract base class providing: +- Multi-controller calibration (capture initial poses) +- Delta computation (current - initial) +- Transform to robot commands (PoseStamped or TwistStamped based on `output_types`) +- Publishing to `controller_delta_*` outputs +- Optional RPC calls for initial robot pose +- Rerun visualization + +### VRTeleopModule + +VR-specific implementation that: +- Subscribes to LCM Transform messages from Deno bridge +- Applies coordinate frame transformation (WebXR → Robot) +- Transforms deltas to robot commands based on output type +- Runs control loop at 50Hz + +## Data Flow + +``` +LCM Input (Transform) + │ + ▼ +┌──────────────────┐ +│ Coordinate Frame │ WebXR: X=right, Y=up, Z=back +│ Transformation │ Robot: X=forward, Y=left, Z=up +└──────────────────┘ + │ + ▼ +┌──────────────────┐ +│ Calibration │ On X button: capture initial controller pose +│ │ (optional) RPC call for initial robot pose +└──────────────────┘ + │ + ▼ +┌──────────────────┐ +│ Delta Compute │ delta = current_controller - initial_controller +└──────────────────┘ + │ + ▼ +┌──────────────────┐ +│ Transform │ PoseStamped: target = initial_robot + delta +│ │ TwistStamped: velocity = scale(delta) +└──────────────────┘ + │ + ▼ + LCM Output (controller_delta_0/1/2/3) +``` + +## Configuration + +```python +from dimos.teleop.devices import VRTeleopConfig +from dimos.msgs.geometry_msgs import PoseStamped, TwistStamped + +config = VRTeleopConfig( + # Output types determine active indices: + # PoseStamped → indices 0,1 | TwistStamped → indices 2,3 + output_types=[PoseStamped, TwistStamped], + input_labels=["left_vr", "right_vr"], + + # Optional RPC methods for initial robot pose (per output) + robot_pose_rpc_methods=["ArmDriver.get_ee_pose", None], + + # Visualization + visualize_in_rerun=True, + + # Control + control_loop_hz=50.0, + + # Transform settings + linear_scale=1.0, + angular_scale=1.0, + max_linear_velocity=0.5, + max_angular_velocity=1.0, + gripper_threshold=0.5, +) +``` + +## Output Indices + +The `output_types` parameter auto-computes active indices: + +| Configuration | Active Indices | Outputs | +|--------------|----------------|---------| +| `[PoseStamped, PoseStamped]` | `[0, 1]` | Dual arm | +| `[TwistStamped, TwistStamped]` | `[2, 3]` | Dual locomotion | +| `[PoseStamped, TwistStamped]` | `[0, 2]` | Arm + quadruped | + +## Adding New Devices + +1. Inherit from `BaseTeleopModule` +2. Override `start()` to set up input subscriptions +3. Transform input to 4x4 pose matrix in robot frame +4. Call `compute_deltas(poses, trigger_values)` to get delta poses +5. Use `transform_delta()` from utils to convert to command +6. Call `publish_command(index, command, aux_command)` to publish diff --git a/dimos/teleop/devices/__init__.py b/dimos/teleop/devices/__init__.py new file mode 100644 index 0000000000..c89dea46d5 --- /dev/null +++ b/dimos/teleop/devices/__init__.py @@ -0,0 +1,35 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Teleoperation devices.""" + +from dimos.teleop.devices.base_teleop_module import ( + BaseTeleopConfig, + BaseTeleopModule, + TeleopStatusKey, +) +from dimos.teleop.devices.vr_teleop_module import ( + VRTeleopConfig, + VRTeleopModule, + vr_teleop_module, +) + +__all__ = [ + "BaseTeleopConfig", + "BaseTeleopModule", + "TeleopStatusKey", + "VRTeleopConfig", + "VRTeleopModule", + "vr_teleop_module", +] diff --git a/dimos/teleop/devices/base_teleop_module.py b/dimos/teleop/devices/base_teleop_module.py new file mode 100644 index 0000000000..4c6a10631e --- /dev/null +++ b/dimos/teleop/devices/base_teleop_module.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Base Teleoperation Module. + +Provides calibration, delta computation, and command publishing. +Subclasses implement device-specific input handling. +""" + +from dataclasses import dataclass, field +from enum import Enum +from typing import TYPE_CHECKING, Any, TypeVar + +from dimos.core import Module, Out, rpc +from dimos.core.module import ModuleConfig +from dimos.msgs.geometry_msgs import Pose, PoseStamped, TwistStamped +from dimos.msgs.std_msgs import Bool +from dimos.teleop.utils.teleop_visualization import ( + init_rerun_visualization, + visualize_pose, + visualize_trigger_value, +) +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + + +@dataclass +class BaseTeleopConfig(ModuleConfig): + """Base configuration for teleoperation modules. + + output_types determines active indices: PoseStamped→0,1, TwistStamped→2,3 + """ + + output_types: list[type] = field(default_factory=lambda: [PoseStamped, PoseStamped]) + input_labels: list[str] = field(default_factory=lambda: ["left", "right"]) + robot_pose_rpc_methods: list[str | None] = field(default_factory=list) + + visualize_in_rerun: bool = True + + linear_scale: float = 1.0 + angular_scale: float = 1.0 + max_linear_velocity: float = 0.5 + max_angular_velocity: float = 1.0 + gripper_threshold: float = 0.5 + + +class TeleopStatusKey(str, Enum): + """Status dictionary keys (controller_* entries are indexed by controller number).""" + + IS_CALIBRATED = "is_calibrated" + CONTROLLER_HAS_DATA = "controller_{index}_has_data" + CONTROLLER_TRIGGER_VALUE = "controller_{index}_trigger_value" + CONTROLLER_LABEL = "controller_{index}_label" + + +TeleopConfigT = TypeVar("TeleopConfigT", bound=BaseTeleopConfig) + + +class BaseTeleopModule(Module[TeleopConfigT]): + """Base class for teleoperation modules. + + Handles calibration, delta computation, and command publishing. + Subclasses implement device-specific input handling. + """ + + default_config: type[TeleopConfigT] = BaseTeleopConfig # type: ignore[assignment] + + # Output topics: PoseStamped for arms (0,1), TwistStamped for locomotion (2,3) + controller_delta_0: Out[PoseStamped] + trigger_value_0: Out[Bool] + controller_delta_1: Out[PoseStamped] + trigger_value_1: Out[Bool] + controller_delta_2: Out[TwistStamped] + trigger_value_2: Out[Bool] + controller_delta_3: Out[TwistStamped] + trigger_value_3: Out[Bool] + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + self._is_calibrated = False + self._tracking_msg_count = 0 + self._publish_count = 0 + + if not self.config.output_types: + raise ValueError("output_types cannot be empty") + + if len(self.config.input_labels) != len(self.config.output_types): + raise ValueError( + f"input_labels length ({len(self.config.input_labels)}) must match " + f"output_types length ({len(self.config.output_types)})" + ) + + # Index mapping: output type → available indices + self._output_type_indices: dict[type, list[int]] = { + PoseStamped: [0, 1], + TwistStamped: [2, 3], + } + + self._active_indices = self._compute_active_indices(self.config.output_types) + + self._initial_controller_poses: dict[int, Pose | None] = { + i: None for i in self._active_indices + } + self._initial_robot_poses: dict[int, Pose | None] = {i: None for i in self._active_indices} + self._current_controller_poses: dict[int, Pose | None] = { + i: None for i in self._active_indices + } + self._all_trigger_values: dict[int, float] = {i: 0.0 for i in self._active_indices} + + logger.info( + f"{self.__class__.__name__} initialized: indices={self._active_indices}, " + f"types={[t.__name__ for t in self.config.output_types]}" + ) + + def _get_label(self, index: int) -> str: + """Get label for an active index.""" + try: + i = self._active_indices.index(index) + return self.config.input_labels[i] + except (ValueError, IndexError): + return f"controller_{index}" + + def _compute_active_indices(self, output_types: list[type]) -> list[int]: + """Compute active indices from output types. + Example: + [PoseStamped, TwistStamped] → [0, 2] + [TwistStamped, PoseStamped] → [2, 0] + [PoseStamped, PoseStamped] → [0, 1] + [TwistStamped, TwistStamped] → [2, 3] + """ + indices: list[int] = [] + used_indices: set[int] = set() + + for output_type in output_types: + available = self._output_type_indices.get(output_type, []) + for idx in available: + if idx not in used_indices: + indices.append(idx) + used_indices.add(idx) + break + else: + raise ValueError(f"No available index for output type {output_type.__name__}") + return indices + + @rpc + def start(self) -> None: + super().start() + logger.info(f"Starting {self.__class__.__name__}...") + if self.config.visualize_in_rerun: + init_rerun_visualization() + + @rpc + def stop(self) -> None: + logger.info(f"Stopping {self.__class__.__name__}...") + super().stop() + + @rpc + def calibrate(self) -> bool: + """Capture current controller poses as the zero reference.""" + logger.info("Calibrating controllers...") + + try: + if not self._active_indices: + logger.error("Calibration failed: No controllers are enabled") + return False + + for i, idx in enumerate(self._active_indices): + pose = self._current_controller_poses.get(idx) + if pose is not None: + self._initial_controller_poses[idx] = pose + logger.info(f"Captured controller {self._get_label(idx)} initial: {pose}") + else: + logger.error( + f"Calibration failed: Controller {self._get_label(idx)} data is None" + ) + return False + + # Get initial robot state via RPC if configured (only for PoseStamped) + output_type = self.config.output_types[i] + if output_type == PoseStamped: + if i < len(self.config.robot_pose_rpc_methods): + rpc_method = self.config.robot_pose_rpc_methods[i] + if rpc_method is not None: + self._initial_robot_poses[idx] = self._get_initial_robot_state( + rpc_method + ) + else: + self._initial_robot_poses[idx] = None + else: + self._initial_robot_poses[idx] = None + else: + # Twist doesn't need initial robot pose + self._initial_robot_poses[idx] = None + + self._is_calibrated = True + logger.info("Calibration complete. Now streaming delta poses...") + return True + + except Exception as e: + logger.error(f"Calibration failed: {e}", exc_info=True) + return False + + def _get_initial_robot_state(self, rpc_method: str) -> Pose | None: + """Get initial robot state via RPC call. + Args: + rpc_method: RPC method name that returns the initial state. + Returns: + Robot state from RPC, or origin Pose() if RPC fails. + """ + try: + state: Pose = self.get_rpc_calls(rpc_method)() + logger.info(f"Got initial robot state via {rpc_method}: {state}") + return state # type: ignore[no-any-return] + except Exception as e: + logger.warning(f"RPC call {rpc_method} failed: {e}, using origin pose") + return None + + @rpc + def reset_calibration(self) -> None: + """Reset calibration. Stops streaming until recalibrated.""" + self._is_calibrated = False + self._initial_controller_poses = {i: None for i in self._active_indices} + self._initial_robot_poses = {i: None for i in self._active_indices} + logger.info("Calibration reset.") + + @rpc + def is_calibrated(self) -> bool: + return self._is_calibrated + + @rpc + def get_status(self) -> dict[str, Any]: + """Get current teleoperation status.""" + status: dict[str, Any] = { + TeleopStatusKey.IS_CALIBRATED.value: self._is_calibrated, + "active_indices": self._active_indices, + } + for i in self._active_indices: + status[TeleopStatusKey.CONTROLLER_HAS_DATA.value.format(index=i)] = ( + self._current_controller_poses.get(i) is not None + ) + status[TeleopStatusKey.CONTROLLER_TRIGGER_VALUE.value.format(index=i)] = ( + self._all_trigger_values.get(i, 0.0) + ) + status[TeleopStatusKey.CONTROLLER_LABEL.value.format(index=i)] = self._get_label(i) + return status + + def compute_deltas( + self, + controller_poses: dict[int, Pose | None], + controller_trigger_values: dict[int, float], + ) -> dict[int, Pose | None]: + """Compute delta = current_pose - initial_pose for each controller.""" + self._tracking_msg_count += 1 + self._current_controller_poses = controller_poses + self._all_trigger_values = controller_trigger_values + + if not self._is_calibrated: + if self._tracking_msg_count <= 1 or self._tracking_msg_count % 100 == 0: + logger.warning("Not calibrated. Calibrate to start teleoperation.") + return {i: None for i in self._active_indices} + + self._publish_count += 1 + deltas: dict[int, Pose | None] = {} + + for i in self._active_indices: + current_pose = self._current_controller_poses.get(i) + if current_pose is None: + deltas[i] = None + continue + + initial_pose = self._initial_controller_poses.get(i) + if initial_pose is None: + deltas[i] = None + continue + + delta_pose = current_pose - initial_pose + if self.config.visualize_in_rerun: + label = self._get_label(i) + visualize_pose(current_pose, label) + visualize_trigger_value(self._all_trigger_values.get(i, 0.0), label) + + deltas[i] = delta_pose + + return deltas + + def publish_command(self, index: int, command: Any, aux_command: Any = None) -> None: + """Publish command to controller_delta_{index} and trigger_value_{index}.""" + if command is not None: + controller_output = getattr(self, f"controller_delta_{index}", None) + if controller_output and hasattr(controller_output, "publish"): + try: + controller_output.publish(command) + except Exception as e: + logger.error(f"Failed to publish command for index {index}: {e}") + + if aux_command is not None: + trigger_output = getattr(self, f"trigger_value_{index}", None) + if trigger_output and hasattr(trigger_output, "publish"): + try: + trigger_output.publish(aux_command) + except Exception as e: + logger.debug(f"Failed to publish aux command for index {index}: {e}") diff --git a/dimos/teleop/devices/vr_teleop_module.py b/dimos/teleop/devices/vr_teleop_module.py new file mode 100644 index 0000000000..e20acb47a0 --- /dev/null +++ b/dimos/teleop/devices/vr_teleop_module.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +VR Teleoperation Module for Quest 3 controllers. + +Receives VR controller tracking data via LCM from Deno bridge, +transforms from WebXR to robot frame, computes deltas, and publishes commands. +""" + +from dataclasses import dataclass, field +import threading +import time +from typing import TYPE_CHECKING, Any + +from dimos_lcm.geometry_msgs import Transform as LCMTransform +from reactivex.disposable import Disposable + +from dimos.core import In, rpc +from dimos.msgs.geometry_msgs import PoseStamped, TwistStamped +from dimos.msgs.std_msgs import Bool, Float32 +from dimos.teleop.devices.base_teleop_module import BaseTeleopConfig, BaseTeleopModule +from dimos.teleop.utils.teleop_transforms import ( + pose_to_pose_stamped, + pose_to_twist_stamped, + transform_vr_to_robot, +) +from dimos.utils.logging_config import setup_logger +from dimos.utils.transform_utils import matrix_to_pose + +if TYPE_CHECKING: + from dimos.msgs.geometry_msgs.Pose import Pose + +logger = setup_logger() + + +@dataclass +class VRTeleopConfig(BaseTeleopConfig): + """Configuration for VR Teleoperation Module.""" + + output_types: list[type] = field(default_factory=lambda: [PoseStamped, PoseStamped]) + input_labels: list[str] = field(default_factory=lambda: ["left_vr", "right_vr"]) + robot_pose_rpc_methods: list[str | None] = field(default_factory=lambda: [None, None]) + control_loop_hz: float = 50.0 + + +class VRTeleopModule(BaseTeleopModule[VRTeleopConfig]): + """VR Teleoperation Module for Quest 3 controllers. + + Subscribes to controller data from Deno bridge, transforms WebXR→robot frame, + computes deltas from calibration point, and publishes commands. + """ + + default_config = VRTeleopConfig + + # LCM inputs from Deno bridge + vr_left_transform: In[LCMTransform] + vr_right_transform: In[LCMTransform] + vr_trigger_0: In[Float32] + vr_trigger_1: In[Float32] + vr_teleop_enable: In[Bool] # X button calibration toggle + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + self._lcm_lock = threading.Lock() + self._lcm_controller_poses: dict[int, Pose | None] = {i: None for i in self._active_indices} + self._lcm_receive_times: dict[int, float] = {i: 0.0 for i in self._active_indices} + self._lcm_frame_ids: dict[int, str] = {i: "" for i in self._active_indices} + self._lcm_gripper_values: dict[int, float] = {i: 0.0 for i in self._active_indices} + self._control_loop_thread: threading.Thread | None = None + self._control_loop_running = False + + @rpc + def start(self) -> None: + """Start the VR teleoperation module.""" + super().start() + + left_index = self._active_indices[0] if len(self._active_indices) > 0 else 0 + right_index = self._active_indices[1] if len(self._active_indices) > 1 else 1 + + logger.info( + f"VR controller mapping: left→{self._get_label(left_index)}(idx {left_index}), " + f"right→{self._get_label(right_index)}(idx {right_index})" + ) + + # Subscribe to LCM inputs + subscriptions = [ + (self.vr_left_transform, lambda msg, idx=left_index: self._on_lcm_transform(idx, msg)), + ( + self.vr_right_transform, + lambda msg, idx=right_index: self._on_lcm_transform(idx, msg), + ), + (self.vr_trigger_0, lambda msg, idx=left_index: self._on_lcm_trigger(idx, msg)), + (self.vr_trigger_1, lambda msg, idx=right_index: self._on_lcm_trigger(idx, msg)), + (self.vr_teleop_enable, self._on_lcm_teleop_enable), + ] + for stream, handler in subscriptions: + if stream and stream.transport: + self._disposables.add(Disposable(stream.subscribe(handler))) # type: ignore[misc, arg-type] + + logger.info("VR Teleoperation Module started") + + @rpc + def stop(self) -> None: + """Stop the VR teleoperation module.""" + logger.info("Stopping VR Teleoperation Module...") + self._stop_control_loop() + super().stop() + + @rpc + def start_teleop(self) -> None: + """Calibrate and start teleoperation (called via X button).""" + logger.info("Starting teleop - calibrating VR...") + self.calibrate() + + @rpc + def stop_teleop(self) -> None: + """Stop teleoperation and reset calibration.""" + logger.info("Stopping teleop - resetting calibration...") + self._stop_control_loop() + self.reset_calibration() + + def _on_lcm_teleop_enable(self, msg: Bool) -> None: + """Handle teleop enable/disable from X button.""" + logger.info(f"Received teleop_enable: {msg.data}") + if bool(msg.data): + self.start_teleop() + else: + self.stop_teleop() + + def _on_lcm_transform(self, index: int, transform: LCMTransform) -> None: + """Handle controller transform, converting WebXR to robot frame.""" + receive_time = time.time() + self._start_control_loop() + is_left = index == self._active_indices[0] + frame_id = "left_controller" if is_left else "right_controller" + transform_matrix = transform_vr_to_robot(transform, is_left_controller=is_left) + pose = matrix_to_pose(transform_matrix) + with self._lcm_lock: + self._lcm_controller_poses[index] = pose + self._lcm_receive_times[index] = receive_time + self._lcm_frame_ids[index] = frame_id + + def _on_lcm_trigger(self, index: int, msg: Float32) -> None: + """Handle trigger value for gripper control.""" + self._start_control_loop() + with self._lcm_lock: + self._lcm_gripper_values[index] = float(msg.data) + + def _start_control_loop(self) -> None: + """Start the control loop thread if not running.""" + if self._control_loop_running: + return + + self._control_loop_running = True + self._control_loop_thread = threading.Thread( + target=self._control_loop, + daemon=True, + name="VRTeleopControlLoop", + ) + self._control_loop_thread.start() + logger.info(f"Control loop started at {self.config.control_loop_hz} Hz") + + def _stop_control_loop(self) -> None: + self._control_loop_running = False + if self._control_loop_thread is not None: + self._control_loop_thread.join(timeout=1.0) + self._control_loop_thread = None + logger.info("Control loop stopped") + + def _control_loop(self) -> None: + """Main control loop: compute deltas and publish commands at fixed rate.""" + period = 1.0 / self.config.control_loop_hz + + while self._control_loop_running: + loop_start = time.perf_counter() + + with self._lcm_lock: + controller_poses = dict(self._lcm_controller_poses) + controller_trigger_values = dict(self._lcm_gripper_values) + receive_times = dict(self._lcm_receive_times) + frame_ids = dict(self._lcm_frame_ids) + + deltas = self.compute_deltas(controller_poses, controller_trigger_values) + + for idx, i in enumerate(self._active_indices): + delta = deltas.get(i) + if delta is None: + continue + + trigger_value = self._all_trigger_values.get(i, 0.0) + trigger_bool = Bool(data=trigger_value > self.config.gripper_threshold) + ts = receive_times.get(i, time.time()) + frame_id = frame_ids.get(i, "") + + output_type = self.config.output_types[idx] + if output_type == PoseStamped: + command = pose_to_pose_stamped( + delta, + initial_robot_pose=self._initial_robot_poses.get(i), + ts=ts, + frame_id=frame_id, + ) + elif output_type == TwistStamped: + command = pose_to_twist_stamped( + delta, + linear_scale=self.config.linear_scale, + angular_scale=self.config.angular_scale, + max_linear=self.config.max_linear_velocity, + max_angular=self.config.max_angular_velocity, + ts=ts, + frame_id=frame_id, + ) + else: + continue + + self.publish_command(i, command, trigger_bool) + + elapsed = time.perf_counter() - loop_start + sleep_time = period - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + + +vr_teleop_module = VRTeleopModule.blueprint diff --git a/dimos/teleop/teleop_blueprints.py b/dimos/teleop/teleop_blueprints.py new file mode 100644 index 0000000000..22f908c4d5 --- /dev/null +++ b/dimos/teleop/teleop_blueprints.py @@ -0,0 +1,34 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pre-built blueprints for VR teleoperation.""" + +from dimos_lcm.geometry_msgs import Transform as LCMTransform + +from dimos.core.blueprints import autoconnect +from dimos.core.transport import LCMTransport +from dimos.msgs.geometry_msgs import PoseStamped, TwistStamped +from dimos.msgs.std_msgs import Bool, Float32 +from dimos.teleop.devices.vr_teleop_module import vr_teleop_module + +quest3_teleop = autoconnect( + vr_teleop_module( + output_types=[PoseStamped, TwistStamped], + input_labels=["left_vr", "right_vr"], + visualize_in_rerun=True, + ), +) + + +__all__ = ["quest3_teleop"] diff --git a/dimos/teleop/utils/teleop_transforms.py b/dimos/teleop/utils/teleop_transforms.py new file mode 100644 index 0000000000..4ffdfb79a0 --- /dev/null +++ b/dimos/teleop/utils/teleop_transforms.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Teleop transform utilities for VR coordinate transforms.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np +from scipy.spatial.transform import Rotation as R # type: ignore[import-untyped] + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.TwistStamped import TwistStamped +from dimos.msgs.geometry_msgs.Vector3 import Vector3 + +if TYPE_CHECKING: + from dimos_lcm.geometry_msgs import Transform as LCMTransform + from numpy.typing import NDArray + + from dimos.msgs.geometry_msgs.Pose import Pose + +# Coordinate frame transformation from VR (WebXR) to robot frame +# WebXR: X=right, Y=up, Z=back (towards user) +# Robot: X=forward, Y=left, Z=up +VR_TO_ROBOT_FRAME: NDArray[np.float64] = np.array( + [ + [0, 0, -1, 0], # Robot X = -VR Z (forward) + [-1, 0, 0, 0], # Robot Y = -VR X (left) + [0, 1, 0, 0], # Robot Z = +VR Y (up) + [0, 0, 0, 1], + ], + dtype=np.float64, +) + + +def transform_vr_to_robot( + transform: LCMTransform, + is_left_controller: bool = True, +) -> NDArray[np.float64]: + """Transform VR controller pose to robot coordinate frame. + + Args: + transform: LCM Transform from VR controller. + is_left_controller: True for left controller (+90° Z rotation), + False for right controller (-90° Z rotation). + + Returns: + 4x4 transformation matrix in robot frame. + """ + # Convert LCM Transform to 4x4 matrix + t = transform.translation + q = transform.rotation + rot_matrix = R.from_quat([q.x, q.y, q.z, q.w]).as_matrix() + + vr_matrix = np.eye(4, dtype=np.float64) + vr_matrix[:3, :3] = rot_matrix + vr_matrix[:3, 3] = [t.x, t.y, t.z] + + # Apply controller alignment rotation + # Left controller rotates +90° around Z, right rotates -90° + direction = 1 if is_left_controller else -1 + z_rotation = R.from_euler("z", 90 * direction, degrees=True).as_matrix() + vr_matrix[:3, :3] = vr_matrix[:3, :3] @ z_rotation + + # Apply VR to robot frame transformation + return VR_TO_ROBOT_FRAME @ vr_matrix + + +def pose_to_pose_stamped( + delta_pose: Pose, + initial_robot_pose: Pose | None = None, + frame_id: str = "None", + ts: float | None = None, +) -> PoseStamped: + """Convert delta pose to PoseStamped (target = initial + delta).""" + if initial_robot_pose is not None: + target_pose = initial_robot_pose + delta_pose + else: + target_pose = delta_pose + + return PoseStamped( + ts=ts if ts is not None else 0.0, + frame_id=frame_id, + position=target_pose.position, + orientation=target_pose.orientation, + ) + + +def pose_to_twist_stamped( + delta_pose: Pose, + linear_scale: float = 1.0, + angular_scale: float = 1.0, + max_linear: float = 0.5, + max_angular: float = 1.0, + frame_id: str = "", + ts: float | None = None, +) -> TwistStamped: + """Convert delta pose to TwistStamped velocity command.""" + + def clamp(value: float, limit: float) -> float: + return max(-limit, min(limit, value)) + + linear = Vector3( + clamp(delta_pose.position.x * linear_scale, max_linear), + clamp(delta_pose.position.y * linear_scale, max_linear), + clamp(delta_pose.position.z * linear_scale, max_linear), + ) + + euler = delta_pose.orientation.to_euler() + angular = Vector3( + clamp(euler.x * angular_scale, max_angular), + clamp(euler.y * angular_scale, max_angular), + clamp(euler.z * angular_scale, max_angular), + ) + + return TwistStamped( + ts=ts if ts is not None else 0.0, + frame_id=frame_id, + linear=linear, + angular=angular, + ) diff --git a/dimos/teleop/utils/teleop_visualization.py b/dimos/teleop/utils/teleop_visualization.py new file mode 100644 index 0000000000..d95e2416f9 --- /dev/null +++ b/dimos/teleop/utils/teleop_visualization.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Teleop visualization utilities for Rerun.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + from dimos.msgs.geometry_msgs import Pose + +logger = setup_logger() + +try: + import rerun as rr + + from dimos.dashboard.rerun_init import connect_rerun + + RERUN_AVAILABLE = True +except ImportError: + RERUN_AVAILABLE = False + + +def init_rerun_visualization() -> bool: + """Initialize Rerun visualization connection.""" + if not RERUN_AVAILABLE: + return False + try: + connect_rerun() + logger.info("Connected to Rerun for teleop visualization") + return True + except Exception as e: + logger.warning(f"Failed to connect to Rerun: {e}") + return False + + +def visualize_pose(pose: Pose, controller_label: str) -> None: + """Visualize controller absolute pose in Rerun. + + Args: + pose: The controller's current pose. + controller_label: Label for the controller (e.g., "left_vr"). + """ + if not RERUN_AVAILABLE: + return + try: + from dimos.teleop.utils.teleop_transforms import pose_to_pose_stamped + + pose_stamped = pose_to_pose_stamped( + pose, frame_id=f"world/teleop/{controller_label}_controller" + ) + rr.log( + f"world/teleop/{controller_label}_controller", + pose_stamped.to_rerun(), # type: ignore[no-untyped-call] + ) + # Log 3D axes to visualize controller orientation (X=red, Y=green, Z=blue) + rr.log( + f"world/teleop/{controller_label}_controller/axes", + rr.TransformAxes3D(0.10), # type: ignore[attr-defined] + ) + except Exception as e: + logger.debug(f"Failed to log {controller_label} controller to Rerun: {e}") + + +def visualize_trigger_value(trigger_value: float, controller_label: str) -> None: + """Visualize trigger value in Rerun as a scalar time series. + + Args: + trigger_value: Trigger value (0.0-1.0). + controller_label: Label for the controller (e.g., "left_vr"). + """ + if not RERUN_AVAILABLE: + return + try: + rr.log( + f"world/teleop/{controller_label}_controller/trigger", + rr.Scalars(trigger_value), # type: ignore[attr-defined] + ) + except Exception as e: + logger.debug(f"Failed to log {controller_label} trigger to Rerun: {e}") diff --git a/dimos/teleop/web/README.md b/dimos/teleop/web/README.md new file mode 100644 index 0000000000..defe125f8f --- /dev/null +++ b/dimos/teleop/web/README.md @@ -0,0 +1,53 @@ +# Teleop Web + +WebXR client and server for Quest 3 VR teleoperation. + +## Components + +### teleop_server.ts + +Deno server that bridges WebSocket and LCM: +- Serves WebXR client over HTTPS (required for Quest) +- Forwards LCM packets to browser +- Forwards browser packets to LCM + +### static/index.html + +WebXR client running on Quest 3: +- Captures controller poses at set frequency (Also Depends on Refresh rate) +- Sends Transform messages via WebSocket +- X button triggers calibration (teleop_enable) + +## Running + +```bash +deno run --allow-net --allow-read --allow-run --allow-write --unstable-net dimos/teleop/web/teleop_server.ts +``` + +Server starts at `https://localhost:8443` + +SSL certificates are generated automatically on first run in `assets/teleop_certs/`. + +## Message Flow + +``` +Quest Browser Deno Server Python + │ │ │ + │── Transform (left pose) ─────→ │── /vr_left_transform ─────→ │ + │── Transform (right pose) ────→ │── /vr_right_transform ────→ │ + │── Float32 (left trigger) ────→ │── /vr_trigger_0 ──────────→ │ + │── Float32 (right trigger) ───→ │── /vr_trigger_1 ──────────→ │ + │── Bool (X button) ───────────→ │── /vr_teleop_enable ──────→ │ + │ │ │ + │←─────────────────── LCM packets (subscribePacket) ──────────│ +``` + +## LCM Topics + +| Topic | Type | Description | +|-------|------|-------------| +| `/vr_left_transform` | Transform | Left controller pose (WebXR frame) | +| `/vr_right_transform` | Transform | Right controller pose (WebXR frame) | +| `/vr_trigger_0` | Float32 | Left trigger value (0.0-1.0) | +| `/vr_trigger_1` | Float32 | Right trigger value (0.0-1.0) | +| `/vr_teleop_enable` | Bool | Calibration toggle (X button) | diff --git a/dimos/teleop/web/static/index.html b/dimos/teleop/web/static/index.html new file mode 100644 index 0000000000..b6d325a493 --- /dev/null +++ b/dimos/teleop/web/static/index.html @@ -0,0 +1,412 @@ + + + + + + Quest 3 VR Teleop + + + +
+

Quest 3 VR Teleop

+
Ready to connect
+ + +
+ + + + + diff --git a/dimos/teleop/web/teleop_server.ts b/dimos/teleop/web/teleop_server.ts new file mode 100644 index 0000000000..69332eab6f --- /dev/null +++ b/dimos/teleop/web/teleop_server.ts @@ -0,0 +1,97 @@ +#!/usr/bin/env -S deno run --allow-net --allow-read --allow-run --allow-write --unstable-net + +// LCM to WebSocket Bridge for Robot Control +// Forwards robot pose to browser, receives twist commands from browser + +import { LCM } from "jsr:@dimos/lcm"; +import { dirname, fromFileUrl, join } from "jsr:@std/path"; + +const PORT = 8443; +const clients = new Set(); + +// Resolve paths relative to script location +const scriptDir = dirname(fromFileUrl(import.meta.url)); +const certsDir = join(scriptDir, "../../../assets/teleop_certs"); +const certPath = join(certsDir, "cert.pem"); +const keyPath = join(certsDir, "key.pem"); + +// Auto-generate self-signed certificates if they don't exist +async function ensureCerts(): Promise<{ cert: string; key: string }> { + try { + const cert = await Deno.readTextFile(certPath); + const key = await Deno.readTextFile(keyPath); + return { cert, key }; + } catch { + console.log("Generating self-signed certificates..."); + await Deno.mkdir(certsDir, { recursive: true }); + const cmd = new Deno.Command("openssl", { + args: [ + "req", "-x509", "-newkey", "rsa:2048", + "-keyout", keyPath, "-out", certPath, + "-days", "365", "-nodes", "-subj", "/CN=localhost" + ], + }); + const { code } = await cmd.output(); + if (code !== 0) { + throw new Error("Failed to generate certificates. Is openssl installed?"); + } + console.log("Certificates generated in assets/teleop_certs/"); + return { + cert: await Deno.readTextFile(certPath), + key: await Deno.readTextFile(keyPath), + }; + } +} + +const { cert, key } = await ensureCerts(); + +Deno.serve({ port: PORT, cert, key }, async (req) => { + const url = new URL(req.url); + + if (req.headers.get("upgrade") === "websocket") { + const { socket, response } = Deno.upgradeWebSocket(req); + socket.onopen = () => { console.log("Client connected"); clients.add(socket); }; + socket.onclose = () => { console.log("Client disconnected"); clients.delete(socket); }; + socket.onerror = () => clients.delete(socket); + + // Forward binary LCM packets from browser directly to UDP + socket.binaryType = "arraybuffer"; + let msgCount = 0; + socket.onmessage = async (event) => { + msgCount++; + if (event.data instanceof ArrayBuffer) { + const packet = new Uint8Array(event.data); + try { + await lcm.publishPacket(packet); + } catch (e) { + console.error("Forward error:", e); + } + } + }; + + return response; + } + + if (url.pathname === "/" || url.pathname === "/index.html") { + const html = await Deno.readTextFile(new URL("./static/index.html", import.meta.url)); + return new Response(html, { headers: { "content-type": "text/html" } }); + } + + return new Response("Not found", { status: 404 }); +}); + +console.log(`Server: https://localhost:${PORT}`); + +const lcm = new LCM(); +await lcm.start(); + +// Forward all raw packets to browser (we are decoding LCM directly in the browser) +lcm.subscribePacket((packet) => { + for (const client of clients) { + if (client.readyState === WebSocket.OPEN) { + client.send(packet); + } + } +}); + +await lcm.run();