Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions src/stretch/agent/robot_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,14 @@ def _start_threads(self):
self._update_map_thread = None
self._get_observations_thread = None

def get_parameters(self) -> Parameters:
"""Return the parameters for this model"""
return self.parameters

def get_semantic_sensor(self) -> OvmmPerception:
"""Return the semantic sensor in use by this model"""
return self.semantic_sensor

def get_robot(self) -> AbstractRobotClient:
"""Return the robot in use by this model"""
return self.robot
Expand Down Expand Up @@ -578,7 +586,7 @@ def rotate_in_place(

def get_command(self):
"""Get a command from config file or from user input if not specified."""
task = self.parameters.get("task").get("command")
task = self.parameters.get("task/command")
if task is not None and len(task) > 0:
return task
else:
Expand Down Expand Up @@ -2332,12 +2340,12 @@ def get_object_centric_observations(
scene_graph = self.extract_symbolic_spatial_info(instances)

world_representation = self.get_object_centric_world_representation(
instances,
self.parameters.get("vlm/vlm_context_length", 20),
self.parameters.get("vlm/sample_strategy", "clip"),
task,
self.encode_text(task),
scene_graph,
instance_memory=instances,
task=task,
text_features=self.encode_text(task),
max_context_length=self.parameters.get("vlm/vlm_context_length", 20),
sample_strategy=self.parameters.get("vlm/sample_strategy", "clip"),
scene_graph=scene_graph,
)
return world_representation

Expand Down
51 changes: 31 additions & 20 deletions src/stretch/agent/vlm_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,35 +94,46 @@ def plan(
plan_with_scene_graph=plan_with_scene_graph,
)
output = self.get_output_from_gpt(world_representation, task=query)
if show_plan:

print()
print("Plan from VLMs:")
print(output)
print()
if output == "explore":
print(">>>>>> Planner cannot find a plan, the robot should explore more >>>>>>>>>")
actions = None
elif output == "gpt API error":
print(">>>>>> there is something wrong with the planner api >>>>>>>>>")
actions = None
else:
actions = output.split("; ")

if show_plan and actions is not None:
import re

import matplotlib.pyplot as plt

if output == "explore":
print(">>>>>> Planner cannot find a plan, the robot should explore more >>>>>>>>>")
elif output == "gpt API error":
print(">>>>>> there is something wrong with the planner api >>>>>>>>>")
else:
actions = output.split("; ")
plt.clf()
for action_id, action in enumerate(actions):
crop_id = int(re.search(r"img_(\d+)", action).group(1))
global_id = world_representation.object_images[crop_id].instance_id
plt.subplot(1, len(actions), action_id + 1)
plt.imshow(
self.voxel_map.get_instances()[global_id].get_best_view().get_image()
)
plt.title(action.split("(")[0] + f" instance {global_id}")
plt.axis("off")
plt.suptitle(f"Task: {query}")
plt.show()
plt.savefig("plan.png")
plt.clf()
for action_id, action in enumerate(actions):
res = re.search(r"img_(\d+)", action)
if res is None:
print(f"{action_id}: Action {action} does not have a crop id")
continue
crop_id = int(res.group(1))
global_id = world_representation.object_images[crop_id].instance_id
plt.subplot(1, len(actions), action_id + 1)
plt.imshow(self.voxel_map.get_instances()[global_id].get_best_view().get_image())
plt.title(action.split("(")[0] + f" instance {global_id}")
plt.axis("off")
plt.suptitle(f"Task: {query}")
plt.show()
plt.savefig("plan.png")

if self.parameters.get("save_vlm_plan", True):
with open(plan_file, "w") as f:
f.write(output)
print(f"Task plan generated from VLMs has been written to {plan_file}")

return actions, world_representation

def get_output_from_gpt(self, world_rep, task: str):
Expand Down
208 changes: 108 additions & 100 deletions src/stretch/app/vlm_planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from stretch.agent import RobotAgent
from stretch.agent.vlm_planner import VLMPlanner
from stretch.agent.zmq_client import HomeRobotZMQClient
from stretch.agent.zmq_client import HomeRobotZmqClient
from stretch.core import get_parameters
from stretch.core.interfaces import Observations
from stretch.perception import create_semantic_sensor
Expand Down Expand Up @@ -107,7 +107,6 @@ def images_to_video(image_list, output_path, fps=30):
default="",
help="Input path. If empty, run on the real robot.",
)
@click.option("--local", is_flag=True, help="Run code locally on the robot.")
@click.option("--robot_ip", default="")
@click.option("--task", "-t", type=str, default="", help="Task to run with the planner.")
@click.option(
Expand Down Expand Up @@ -145,12 +144,14 @@ def main(
show_instances: bool = False,
api_key: str = None,
task: str = "",
local: bool = False,
robot_ip: str = "",
):
"""Simple script to load a voxel map"""
input_path = Path(input_path)
print("Loading:", input_path)
if len(input_path) > 0:
input_path = Path(input_path)
print("Loading:", input_path)
else:
input_path = None

loaded_voxel_map = None

Expand All @@ -172,10 +173,11 @@ def main(
print("Creating semantic sensors...")
semantic_sensor = create_semantic_sensor(parameters=vlm_parameters)

if len(input_path) > 0:
if input_path is not None:
# We will load data from a pickle file
robot = DummyStretchClient()
else:
robot = HomeRobotZMQClient(robot_ip=robot_ip, local=local)
robot = HomeRobotZmqClient(robot_ip=robot_ip)

print("Creating robot agent...")
agent = RobotAgent(
Expand All @@ -186,11 +188,12 @@ def main(
)
voxel_map = agent.get_voxel_map()

if len(input_path) > 0:
if input_path is not None:
# load from pickle
voxel_map.read_from_pickle(input_path, num_frames=frame, perception=semantic_sensor)
else:
# Scan the local area to get a map
agent.start()
agent.rotate_in_place()

# get the task
Expand All @@ -207,6 +210,9 @@ def main(
# )
run_vlm_planner(agent, task, show_svm, test_vlm, api_key, show_instances)

# Finish up
robot.stop()


def run_vlm_planner(
agent,
Expand Down Expand Up @@ -249,110 +255,112 @@ def run_vlm_planner(
footprint = robot.get_footprint()
print(f"{x0} valid = {space.is_valid(x0)}")
voxel_map.show(instances=show_instances, orig=start_xyz, xyt=x0, footprint=footprint)
return

if test_vlm:
start_is_valid = space.is_valid(x0, verbose=True, debug=False)
if not start_is_valid:
print("you need to manually set the start pose to be valid")
return
start_is_valid = space.is_valid(x0, verbose=True, debug=False)
if not start_is_valid:
print("you need to manually set the start pose to be valid")
return

print("\nFirst plan with the original map: ")
original_plan, world_rep = vlm_planner.plan(
current_pose=x0,
show_plan=True,
query=task,
plan_with_reachable_instances=False,
plan_with_scene_graph=False,
)
print("\nFirst plan with the original map: ")
original_plan, world_rep = vlm_planner.plan(
current_pose=x0,
show_plan=True,
query=task,
plan_with_reachable_instances=False,
plan_with_scene_graph=False,
)

# loop over the plan and check feasibilities for each action
preconditions = {}
while len(original_plan) > 0:
current_action = original_plan.pop(0)
# loop over the plan and check feasibilities for each action
preconditions = {}
while len(original_plan) > 0:
current_action = original_plan.pop(0)

# navigation action only for now
if "goto" not in current_action:
continue
# navigation action only for now
if "goto" not in current_action:
continue

# get the target object instance from the action
crop_id = int(re.search(r"img_(\d+)", current_action).group(1))
global_id = world_rep.object_images[crop_id].instance_id
current_instance = voxel_map.get_instances()[global_id]
# get the target object instance from the action
crop_id = int(re.search(r"img_(\d+)", current_action).group(1))
global_id = world_rep.object_images[crop_id].instance_id
current_instance = voxel_map.get_instances()[global_id]

print(f"Checking feasibility of action: {current_action}")
motion_plan = agent.plan_to_instance_for_manipulation(
current_instance, start=np.array(start_xyz)
)
feasible = motion_plan.get_success()
print(f"Checking feasibility of action: {current_action}")
motion_plan = agent.plan_to_instance_for_manipulation(
current_instance, start=np.array(start_xyz)
)
feasible = motion_plan.get_success()

if feasible:
if feasible:
continue

print(f"Action {current_action} is not feasible.")
print("Searching over the map and replanning...")
# loop over all instances in the map and try to find a feasible action
# TODO: This is highly inefficient and should be replaced with scene graph
planning_map = copy.deepcopy(voxel_map)
planning_xyz = start_xyz
for removed_instance in voxel_map.get_instances():

# skip the current instance
if removed_instance == current_instance:
continue

print(f"Action {current_action} is not feasible.")
print("Searching over the map and replanning...")
# loop over all instances in the map and try to find a feasible action
# TODO: This is highly inefficient and should be replaced with scene graph
planning_map = copy.deepcopy(voxel_map)
planning_xyz = start_xyz
for removed_instance in voxel_map.get_instances():

# skip the current instance
if removed_instance == current_instance:
continue

# manually remove an instance for testing planning.
new_map = copy.deepcopy(planning_map)
new_map.delete_instance(
removed_instance, force_update=True, min_bound_z=0.05, assume_explored=True
)
# manually remove an instance for testing planning.
new_map = copy.deepcopy(planning_map)
new_map.delete_instance(
removed_instance, force_update=True, min_bound_z=0.05, assume_explored=True
)

# # visulize the deleted_instance and the new map
# pcd = o3d.geometry.PointCloud()
# pcd.points = o3d.utility.Vector3dVector(removed_instance.point_cloud.numpy())
# o3d.visualization.draw_geometries([pcd])
# new_map.show()

# create a new agent for planning with the updated map
planning_agent = RobotAgent(
robot,
vlm_parameters,
voxel_map=new_map,
semantic_sensor=semantic_sensor,
)
# # visulize the deleted_instance and the new map
# pcd = o3d.geometry.PointCloud()
# pcd.points = o3d.utility.Vector3dVector(removed_instance.point_cloud.numpy())
# o3d.visualization.draw_geometries([pcd])
# new_map.show()

# create a new agent for planning with the updated map
planning_agent = RobotAgent(
robot,
vlm_parameters,
voxel_map=new_map,
semantic_sensor=semantic_sensor,
)

# motion planning with the new map
motion_plan = planning_agent.plan_to_instance_for_manipulation(
removed_instance, start=np.array(planning_xyz)
)
feasible = motion_plan.get_success()
if feasible:
print(
f"Found a feasible motion plan for action: {current_action} by removing instance {removed_instance.global_id}"
)
planning_xyz = motion_plan.get_trajectory()[-1].state
planning_map = new_map

# find img_id of the removed instance
for obj_im in world_rep.object_images:
if obj_im.instance_id == removed_instance.global_id:
removed_crop_id = obj_im.crop_id
break

# append preconditions
preconditions[current_action] = removed_crop_id
break

print("\nPlan with the task with preconditions: ")
print(preconditions)
for action, crop_id in preconditions.items():
task += f" Before {action}, relocate img_{crop_id} to another instance."
vlm_planner.plan(
current_pose=x0,
show_plan=True,
query=task,
plan_with_reachable_instances=False,
plan_with_scene_graph=False,
# motion planning with the new map
motion_plan = planning_agent.plan_to_instance_for_manipulation(
removed_instance, start=np.array(planning_xyz)
)
feasible = motion_plan.get_success()
if feasible:
print(
f"Found a feasible motion plan for action: {current_action} by removing instance {removed_instance.global_id}"
)
planning_xyz = motion_plan.get_trajectory()[-1].state
planning_map = new_map

# find img_id of the removed instance
for obj_im in world_rep.object_images:
if obj_im.instance_id == removed_instance.global_id:
removed_crop_id = obj_im.crop_id
break

# append preconditions
preconditions[current_action] = removed_crop_id
break

print("\nPlan with the task with preconditions: ")
print(preconditions)
for action, crop_id in preconditions.items():
task += f" Before {action}, relocate img_{crop_id} to another instance."

# Run the planner
vlm_planner.plan(
current_pose=x0,
show_plan=True,
query=task,
plan_with_reachable_instances=False,
plan_with_scene_graph=False,
)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ vlm:
replanning: False
sample_strategy: "clip"

task:
command: "Pick up a basket."
# task:
# command: "Pick up a basket."
Loading