From 2d5b7e4b78e9595d867a1329cae7a4133d09c8b4 Mon Sep 17 00:00:00 2001 From: Shyam Sudhakaran Date: Wed, 5 Oct 2022 00:25:31 -0700 Subject: [PATCH 1/3] sb3_take_cover --- examples/rl/sb3_take_cover.py | 269 ++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 examples/rl/sb3_take_cover.py diff --git a/examples/rl/sb3_take_cover.py b/examples/rl/sb3_take_cover.py new file mode 100644 index 00000000..727f13ff --- /dev/null +++ b/examples/rl/sb3_take_cover.py @@ -0,0 +1,269 @@ +# Copyright 2022 The HuggingFace Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 + +from turtle import left, right +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, Union +import argparse +import random +import numpy as np +import matplotlib.pyplot as plt +import itertools + +from simulate import logging + + +logger = logging.get_logger(__name__) + +try: + from stable_baselines3 import PPO +except ImportError: + logger.warning( + "stable-baseline3 is required for this example and is not installed. To install: pip install simulate[sb3]" + ) + exit() + +import simulate as sm + + +CAMERA_HEIGHT = 40 +CAMERA_WIDTH = 64 + + +def rgb2gray(rgb): + return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140]) + +class TakeCoverEnv(sm.RLEnv): + def __init__( + self, + scene_or_map_fn: Union[Callable, sm.Scene], + n_maps: Optional[int] = 1, + n_show: Optional[int] = 1, + time_step: Optional[float] = 1 / 30.0, + frame_skip: Optional[int] = 4, + **engine_kwargs, + ): + super().__init__( + scene_or_map_fn=scene_or_map_fn, + n_maps=n_maps, + n_show=n_show, + time_step=time_step, + frame_skip=frame_skip, + **engine_kwargs + ) + self.action_tags = ["actor_action", "projectile_action_0", "projectile_action_1", "projectile_action_2"] + + self.projectile_nodes = ["projectile_0_0", "projectile_1_0", "projectile_2_0"] + self.projectile_actions = ["projectile_action_0", "projectile_action_1", "projectile_action_2"] + + # self.projectile_action_acceleration_indices = 1 + + self.projectile_position_control_indices = np.arange(2,9) + + def check_projectile_wall_collision(self, event): + needs_reset = False + nodes = event['nodes'] + action_dict = {} + random_positions = list(np.random.choice(self.projectile_position_control_indices, 3, replace=False)) + for i, projectile in enumerate(self.projectile_nodes): + if nodes[projectile]['position'][-1] <= -4.8: + needs_reset = True + action_dict[self.projectile_actions[i]] = [ + [ + [int(random_positions[i])] + ] + ] + + return needs_reset, action_dict + + def set_random_positions(self): + action_dict = {} + random_positions = list(np.random.choice(self.projectile_position_control_indices, 3, replace=False)) + for i, projectile in enumerate(self.projectile_nodes): + action_dict[self.projectile_actions[i]] = [ + [ + [int(random_positions[i])] + ] + ] + self.step_send_async( + action_dict + ) + self.scene.engine.step_recv_async() + + + def reset(self) -> Dict: + """ + Resets the actors and the scene of the environment. + + Returns: + obs (`Dict`): the observation of the environment after reset. + """ + self.scene.reset() + + # To extract observations, we do a "fake" step (no actual simulation with frame_skip=0) + event = self.scene.step(return_frames=True, frame_skip=0) + obs = self._extract_sensor_obs(event["actor_sensor_buffers"]) + obs = self._squeeze_actor_dimension(obs) + obs["actor_0_camera"] = obs["actor_0_camera"][0:1] + return obs + + + def step(self, action: Union[Dict, List, np.ndarray]) -> Tuple[Dict, np.ndarray, np.ndarray, List[Dict]]: + action_dict = { + "actor_action":[ + [ + [int(action[0])], + ], + ], + } + + for projectile in self.projectile_actions: + action_dict[projectile] = [ + [ + [1], + ], + ] + + self.step_send_async(action=action_dict) + event = self.scene.engine.step_recv_async() + + obs = self._extract_sensor_obs(event["actor_sensor_buffers"]) + reward = self._convert_to_numpy(event["actor_reward_buffer"]).flatten()[0:1] + done = self._convert_to_numpy(event["actor_done_buffer"]).flatten()[0:1] + obs = self._squeeze_actor_dimension(obs) + obs["actor_0_camera"] = obs["actor_0_camera"][0:1] + # obs = self._squeeze_actor_dimension(obs)['actor_0_camera'] + # print("REWARD", reward) + # print("DONE", done) + # obs = np.flip(np.array(obs, dtype=np.uint8).transpose(1, 2, 0), axis=0).astype(np.uint8) + # obs = rgb2gray(obs) + + needs_reset, projectile_reset_action_dict = self.check_projectile_wall_collision(event) + + if needs_reset: + self.step_send_async( + projectile_reset_action_dict + ) + self.scene.engine.step_recv_async() + + return obs, reward, done, [{}] + + +def create_target_projectiles(index: int, num_projectiles: int = 3): + projectiles = [] + for i in range(num_projectiles): + target_position = [0.5*i + 0.1, 0.2, 4.0] + projectile = sm.Box( + name=f"projectile_{i}_{index}", + position=target_position, + bounds = (-0.1, 0.1, 0.1, 0.3, -0.1, 0.1), + material=sm.Material.RED, + is_actor=True, + physics_component=sm.RigidBodyComponent(mass=0), + with_collider=True, + ) + projectile.physics_component.constraints = ["freeze_rotation_x", "freeze_rotation_z", "freeze_rotation_y"] + mapping = [ + sm.ActionMapping("do_nothing"), + + # acceleration + sm.ActionMapping("change_position", axis=[0, 0, -1], amplitude=0.15), + # sm.ActionMapping("change_position", axis=[0, 0, -1], amplitude=0.3), + # sm.ActionMapping("change_position", axis=[0, 0, -1], amplitude=0.1), + + # set positions + sm.ActionMapping("set_position", position=[-3.0, 0.2, 4.0], use_local_coordinates=False), + sm.ActionMapping("set_position", position=[-2.0, 0.2, 4.0], use_local_coordinates=False), + sm.ActionMapping("set_position", position=[-1.0, 0.2, 4.0], use_local_coordinates=False), + sm.ActionMapping("set_position", position=[0.0, 0.2, 4.0], use_local_coordinates=False), + sm.ActionMapping("set_position", position=[1.0, 0.2, 4.0], use_local_coordinates=False), + sm.ActionMapping("set_position", position=[2.0, 0.2, 4.0], use_local_coordinates=False), + sm.ActionMapping("set_position", position=[2.5, 0.2, 4.0], use_local_coordinates=False), + ] + projectile.actuator = sm.Actuator(n=9, actuator_tag=f"projectile_action_{i}", mapping=mapping) + projectiles.append(projectile) + return projectiles + + +def generate_map(index): + root = sm.Asset(name=f"root_{index}") + + floor = sm.Box(name=f"floor_{index}", position=[0, 0, 0], bounds=[-5, 5, 0, 0.1, -5, 5], material=sm.Material.BLUE) + right_wall = sm.Box(name=f"wall1_{index}", position=[-3.1, 0, 0], bounds=[0, 0.1, 0, 1, -5, 5], material=sm.Material.WHITE) + left_wall = sm.Box(name=f"wall2_{index}", position=[3.1, 0, 0], bounds=[0, 0.1, 0, 1, -5, 5], material=sm.Material.WHITE) + close_wall = sm.Box(name=f"wall4_{index}", position=[0, 0, -5], bounds=[-5, 5, 0, 1, 0, 0.1], material=sm.Material.WHITE) + + root += floor + root += right_wall + root += left_wall + root += close_wall + + + actor = sm.EgocentricCameraActor( + name=f"actor_{index}", + position=[0.0, 0.1, -4.0], + material=sm.Material.GREEN, + ) + + actor.physics_component.mass = 0.0 + actor.physics_component.constraints = ["freeze_rotation_x", "freeze_rotation_z", "freeze_position_y", "freeze_position_z"] + + mapping = [ + sm.ActionMapping("change_position", axis=[-1, 0, 0], amplitude=0.1), + sm.ActionMapping("change_position", axis=[1, 0, 0], amplitude=0.1), + ] + actor.actuator = sm.Actuator(n=2, actuator_tag="actor_action", mapping=mapping) + + # create targets + projectiles = create_target_projectiles(index, 3) + + # add target terminals, if the agent gets hit by any of the projectiles, the episode should end + for projectile in projectiles: + actor += sm.RewardFunction(type="sparse", entity_a=projectile, entity_b=actor, scalar=-100.0, threshold=0.5, is_terminal=True) + + actor += sm.RewardFunction("timeout", scalar=1.0, threshold=200, is_terminal=True) + root += actor + + for projectile in projectiles: + root += projectile + + return root + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--build_exe", default="", type=str, required=False, help="Pre-built unity app for simulate") + parser.add_argument("--n_maps", default=1, type=int, required=False, help="Number of maps to spawn") + parser.add_argument("--n_show", default=1, type=int, required=False, help="Number of maps to show") + args = parser.parse_args() + + env = TakeCoverEnv(generate_map, args.n_maps, args.n_show, engine_exe=args.build_exe) + + model = PPO("MultiInputPolicy", env, verbose=1, n_epochs=1) + model.learn(total_timesteps=10000) + + print("LEARNT") + obs = env.reset() + plt.ion() + _, ax1 = plt.subplots(1, 1) + for i in range(4000): + action, _states = model.predict(obs) + obs, rewards, dones, info = env.step(action) + frame = np.flip(np.array(obs['actor_0_camera'][0], dtype=np.uint8).transpose(1, 2, 0), axis=0).astype(np.uint8) + ax1.clear() + ax1.imshow(frame) + plt.pause(0.1) + + env.close() \ No newline at end of file From b785cb7e31a1e8bdecba10d43fcc1ce4ce710a21 Mon Sep 17 00:00:00 2001 From: Shyam Sudhakaran Date: Wed, 5 Oct 2022 00:29:30 -0700 Subject: [PATCH 2/3] cleanup some commented out lines --- examples/rl/sb3_take_cover.py | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/examples/rl/sb3_take_cover.py b/examples/rl/sb3_take_cover.py index 727f13ff..9eecf5e5 100644 --- a/examples/rl/sb3_take_cover.py +++ b/examples/rl/sb3_take_cover.py @@ -42,9 +42,6 @@ CAMERA_WIDTH = 64 -def rgb2gray(rgb): - return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140]) - class TakeCoverEnv(sm.RLEnv): def __init__( self, @@ -68,8 +65,6 @@ def __init__( self.projectile_nodes = ["projectile_0_0", "projectile_1_0", "projectile_2_0"] self.projectile_actions = ["projectile_action_0", "projectile_action_1", "projectile_action_2"] - # self.projectile_action_acceleration_indices = 1 - self.projectile_position_control_indices = np.arange(2,9) def check_projectile_wall_collision(self, event): @@ -88,21 +83,6 @@ def check_projectile_wall_collision(self, event): return needs_reset, action_dict - def set_random_positions(self): - action_dict = {} - random_positions = list(np.random.choice(self.projectile_position_control_indices, 3, replace=False)) - for i, projectile in enumerate(self.projectile_nodes): - action_dict[self.projectile_actions[i]] = [ - [ - [int(random_positions[i])] - ] - ] - self.step_send_async( - action_dict - ) - self.scene.engine.step_recv_async() - - def reset(self) -> Dict: """ Resets the actors and the scene of the environment. @@ -144,11 +124,6 @@ def step(self, action: Union[Dict, List, np.ndarray]) -> Tuple[Dict, np.ndarray, done = self._convert_to_numpy(event["actor_done_buffer"]).flatten()[0:1] obs = self._squeeze_actor_dimension(obs) obs["actor_0_camera"] = obs["actor_0_camera"][0:1] - # obs = self._squeeze_actor_dimension(obs)['actor_0_camera'] - # print("REWARD", reward) - # print("DONE", done) - # obs = np.flip(np.array(obs, dtype=np.uint8).transpose(1, 2, 0), axis=0).astype(np.uint8) - # obs = rgb2gray(obs) needs_reset, projectile_reset_action_dict = self.check_projectile_wall_collision(event) @@ -180,8 +155,6 @@ def create_target_projectiles(index: int, num_projectiles: int = 3): # acceleration sm.ActionMapping("change_position", axis=[0, 0, -1], amplitude=0.15), - # sm.ActionMapping("change_position", axis=[0, 0, -1], amplitude=0.3), - # sm.ActionMapping("change_position", axis=[0, 0, -1], amplitude=0.1), # set positions sm.ActionMapping("set_position", position=[-3.0, 0.2, 4.0], use_local_coordinates=False), @@ -259,7 +232,7 @@ def generate_map(index): plt.ion() _, ax1 = plt.subplots(1, 1) for i in range(4000): - action, _states = model.predict(obs) + action, _ = model.predict(obs) obs, rewards, dones, info = env.step(action) frame = np.flip(np.array(obs['actor_0_camera'][0], dtype=np.uint8).transpose(1, 2, 0), axis=0).astype(np.uint8) ax1.clear() From 7c8e21e91fe0164144114bb83e161021510ef40b Mon Sep 17 00:00:00 2001 From: Shyam Sudhakaran Date: Wed, 5 Oct 2022 13:21:24 -0700 Subject: [PATCH 3/3] style fixes --- examples/rl/sb3_take_cover.py | 72 ++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/examples/rl/sb3_take_cover.py b/examples/rl/sb3_take_cover.py index 9eecf5e5..f6139d48 100644 --- a/examples/rl/sb3_take_cover.py +++ b/examples/rl/sb3_take_cover.py @@ -14,13 +14,14 @@ # Lint as: python3 -from turtle import left, right -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, Union import argparse +import itertools import random -import numpy as np +from turtle import left, right +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, Union + import matplotlib.pyplot as plt -import itertools +import numpy as np from simulate import logging @@ -58,28 +59,24 @@ def __init__( n_show=n_show, time_step=time_step, frame_skip=frame_skip, - **engine_kwargs + **engine_kwargs, ) self.action_tags = ["actor_action", "projectile_action_0", "projectile_action_1", "projectile_action_2"] self.projectile_nodes = ["projectile_0_0", "projectile_1_0", "projectile_2_0"] self.projectile_actions = ["projectile_action_0", "projectile_action_1", "projectile_action_2"] - self.projectile_position_control_indices = np.arange(2,9) + self.projectile_position_control_indices = np.arange(2, 9) def check_projectile_wall_collision(self, event): needs_reset = False - nodes = event['nodes'] + nodes = event["nodes"] action_dict = {} random_positions = list(np.random.choice(self.projectile_position_control_indices, 3, replace=False)) for i, projectile in enumerate(self.projectile_nodes): - if nodes[projectile]['position'][-1] <= -4.8: + if nodes[projectile]["position"][-1] <= -4.8: needs_reset = True - action_dict[self.projectile_actions[i]] = [ - [ - [int(random_positions[i])] - ] - ] + action_dict[self.projectile_actions[i]] = [[[int(random_positions[i])]]] return needs_reset, action_dict @@ -99,15 +96,14 @@ def reset(self) -> Dict: obs["actor_0_camera"] = obs["actor_0_camera"][0:1] return obs - def step(self, action: Union[Dict, List, np.ndarray]) -> Tuple[Dict, np.ndarray, np.ndarray, List[Dict]]: action_dict = { - "actor_action":[ - [ - [int(action[0])], - ], + "actor_action": [ + [ + [int(action[0])], ], - } + ], + } for projectile in self.projectile_actions: action_dict[projectile] = [ @@ -128,9 +124,7 @@ def step(self, action: Union[Dict, List, np.ndarray]) -> Tuple[Dict, np.ndarray, needs_reset, projectile_reset_action_dict = self.check_projectile_wall_collision(event) if needs_reset: - self.step_send_async( - projectile_reset_action_dict - ) + self.step_send_async(projectile_reset_action_dict) self.scene.engine.step_recv_async() return obs, reward, done, [{}] @@ -139,11 +133,11 @@ def step(self, action: Union[Dict, List, np.ndarray]) -> Tuple[Dict, np.ndarray, def create_target_projectiles(index: int, num_projectiles: int = 3): projectiles = [] for i in range(num_projectiles): - target_position = [0.5*i + 0.1, 0.2, 4.0] + target_position = [0.5 * i + 0.1, 0.2, 4.0] projectile = sm.Box( name=f"projectile_{i}_{index}", position=target_position, - bounds = (-0.1, 0.1, 0.1, 0.3, -0.1, 0.1), + bounds=(-0.1, 0.1, 0.1, 0.3, -0.1, 0.1), material=sm.Material.RED, is_actor=True, physics_component=sm.RigidBodyComponent(mass=0), @@ -152,10 +146,8 @@ def create_target_projectiles(index: int, num_projectiles: int = 3): projectile.physics_component.constraints = ["freeze_rotation_x", "freeze_rotation_z", "freeze_rotation_y"] mapping = [ sm.ActionMapping("do_nothing"), - # acceleration sm.ActionMapping("change_position", axis=[0, 0, -1], amplitude=0.15), - # set positions sm.ActionMapping("set_position", position=[-3.0, 0.2, 4.0], use_local_coordinates=False), sm.ActionMapping("set_position", position=[-2.0, 0.2, 4.0], use_local_coordinates=False), @@ -174,16 +166,21 @@ def generate_map(index): root = sm.Asset(name=f"root_{index}") floor = sm.Box(name=f"floor_{index}", position=[0, 0, 0], bounds=[-5, 5, 0, 0.1, -5, 5], material=sm.Material.BLUE) - right_wall = sm.Box(name=f"wall1_{index}", position=[-3.1, 0, 0], bounds=[0, 0.1, 0, 1, -5, 5], material=sm.Material.WHITE) - left_wall = sm.Box(name=f"wall2_{index}", position=[3.1, 0, 0], bounds=[0, 0.1, 0, 1, -5, 5], material=sm.Material.WHITE) - close_wall = sm.Box(name=f"wall4_{index}", position=[0, 0, -5], bounds=[-5, 5, 0, 1, 0, 0.1], material=sm.Material.WHITE) + right_wall = sm.Box( + name=f"wall1_{index}", position=[-3.1, 0, 0], bounds=[0, 0.1, 0, 1, -5, 5], material=sm.Material.WHITE + ) + left_wall = sm.Box( + name=f"wall2_{index}", position=[3.1, 0, 0], bounds=[0, 0.1, 0, 1, -5, 5], material=sm.Material.WHITE + ) + close_wall = sm.Box( + name=f"wall4_{index}", position=[0, 0, -5], bounds=[-5, 5, 0, 1, 0, 0.1], material=sm.Material.WHITE + ) root += floor root += right_wall root += left_wall root += close_wall - actor = sm.EgocentricCameraActor( name=f"actor_{index}", position=[0.0, 0.1, -4.0], @@ -191,7 +188,12 @@ def generate_map(index): ) actor.physics_component.mass = 0.0 - actor.physics_component.constraints = ["freeze_rotation_x", "freeze_rotation_z", "freeze_position_y", "freeze_position_z"] + actor.physics_component.constraints = [ + "freeze_rotation_x", + "freeze_rotation_z", + "freeze_position_y", + "freeze_position_z", + ] mapping = [ sm.ActionMapping("change_position", axis=[-1, 0, 0], amplitude=0.1), @@ -204,7 +206,9 @@ def generate_map(index): # add target terminals, if the agent gets hit by any of the projectiles, the episode should end for projectile in projectiles: - actor += sm.RewardFunction(type="sparse", entity_a=projectile, entity_b=actor, scalar=-100.0, threshold=0.5, is_terminal=True) + actor += sm.RewardFunction( + type="sparse", entity_a=projectile, entity_b=actor, scalar=-100.0, threshold=0.5, is_terminal=True + ) actor += sm.RewardFunction("timeout", scalar=1.0, threshold=200, is_terminal=True) root += actor @@ -234,9 +238,9 @@ def generate_map(index): for i in range(4000): action, _ = model.predict(obs) obs, rewards, dones, info = env.step(action) - frame = np.flip(np.array(obs['actor_0_camera'][0], dtype=np.uint8).transpose(1, 2, 0), axis=0).astype(np.uint8) + frame = np.flip(np.array(obs["actor_0_camera"][0], dtype=np.uint8).transpose(1, 2, 0), axis=0).astype(np.uint8) ax1.clear() ax1.imshow(frame) plt.pause(0.1) - env.close() \ No newline at end of file + env.close()