From 6ac1d6c4c238dc0ea84c3e6e25e8a1dc848153ff Mon Sep 17 00:00:00 2001 From: Carl Date: Wed, 20 Jul 2022 10:43:00 +0200 Subject: [PATCH] Add minigrid environment Update godot integration structure, reformat and docs Cleanup blender integration Make base env structure Add all the code from the minigrid repo Add script to create textures Update condition Add textures for minigrid --- environments/minigrid/.gitignore | 0 environments/minigrid/LICENCE | 201 +++ environments/minigrid/Makefile | 19 + environments/minigrid/README.md | 31 + .../minigrid/assets/textures/agent.png | Bin 0 -> 349 bytes .../minigrid/assets/textures/ball.png | Bin 0 -> 313 bytes environments/minigrid/assets/textures/box.png | Bin 0 -> 154 bytes .../minigrid/assets/textures/crossing.png | Bin 0 -> 138 bytes .../minigrid/assets/textures/door_closed.png | Bin 0 -> 177 bytes .../minigrid/assets/textures/door_locked.png | Bin 0 -> 174 bytes .../minigrid/assets/textures/door_opened.png | Bin 0 -> 145 bytes .../minigrid/assets/textures/grid_tile.png | Bin 0 -> 148 bytes environments/minigrid/assets/textures/key.png | Bin 0 -> 202 bytes .../minigrid/assets/textures/lava.png | Bin 0 -> 170 bytes environments/minigrid/setup.py | 94 ++ .../minigrid/src/minigrid/__init__.py | 1 + .../minigrid/src/minigrid/envs/__init__.py | 21 + .../src/minigrid/envs/blockedunlockpickup.py | 47 + .../minigrid/src/minigrid/envs/crossing.py | 121 ++ .../minigrid/src/minigrid/envs/distshift.py | 62 + .../minigrid/src/minigrid/envs/doorkey.py | 59 + .../src/minigrid/envs/dynamicobstacles.py | 114 ++ .../minigrid/src/minigrid/envs/empty.py | 68 + .../minigrid/src/minigrid/envs/fetch.py | 96 ++ .../minigrid/src/minigrid/envs/fourrooms.py | 69 + .../minigrid/src/minigrid/envs/gotodoor.py | 91 ++ .../minigrid/src/minigrid/envs/gotoobject.py | 89 ++ .../minigrid/src/minigrid/envs/keycorridor.py | 113 ++ .../minigrid/src/minigrid/envs/lavagap.py | 68 + .../minigrid/src/minigrid/envs/lockedroom.py | 119 ++ .../minigrid/src/minigrid/envs/memory.py | 130 ++ .../minigrid/src/minigrid/envs/multiroom.py | 264 ++++ .../src/minigrid/envs/obstructedmaze.py | 187 +++ .../src/minigrid/envs/playground_v0.py | 71 + .../minigrid/src/minigrid/envs/putnear.py | 117 ++ .../src/minigrid/envs/redbluedoors.py | 71 + .../minigrid/src/minigrid/envs/unlock.py | 40 + .../src/minigrid/envs/unlockpickup.py | 42 + .../minigrid/src/minigrid/minigrid.py | 1329 +++++++++++++++++ .../minigrid/src/minigrid/rendering.py | 126 ++ .../minigrid/src/minigrid/roomgrid.py | 403 +++++ .../minigrid/src/minigrid/simenv_minigrid.py | 53 + .../Blender/simenv_blender/__init__.py | 24 +- integrations/Blender/simenv_blender/client.py | 2 - .../Blender/simenv_blender/simenv_op.py | 1 - .../Blender/simenv_blender/simenv_pnl.py | 1 + .../Blender/simenv_blender/simulator.py | 2 - .../Godot/simenv-godot/Scenes/scene.tscn | 10 +- .../simenv-godot/SimEnv/Bridge/Client.gd | 19 +- .../simenv-godot/SimEnv/Bridge/Command.gd | 8 +- .../SimEnv/Commands/BuildScene.gd | 15 - .../simenv-godot/SimEnv/Commands/Close.gd | 2 + .../simenv-godot/SimEnv/Commands/GetDone.gd | 6 - .../SimEnv/Commands/GetObservation.gd | 6 - .../simenv-godot/SimEnv/Commands/GetReward.gd | 6 - .../simenv-godot/SimEnv/Commands/Reset.gd | 1 + .../simenv-godot/SimEnv/Commands/Step.gd | 3 + .../simenv-godot/SimEnv/Mods/ExampleMod.gd | 5 - .../simenv-godot/SimEnv/Simulation/Agent.gd | 66 - .../SimEnv/Simulation/RewardFunciton.gd | 11 - .../SimEnv/Simulation/SimAgentBase.gd | 11 - .../SimEnv/Simulation/SimObjectBase.gd | 11 - .../Godot/simenv-godot/SimEnv/Simulator.gd | 15 +- integrations/Godot/simenv-godot/project.godot | 20 +- 64 files changed, 4395 insertions(+), 166 deletions(-) create mode 100644 environments/minigrid/.gitignore create mode 100644 environments/minigrid/LICENCE create mode 100644 environments/minigrid/Makefile create mode 100644 environments/minigrid/README.md create mode 100644 environments/minigrid/assets/textures/agent.png create mode 100644 environments/minigrid/assets/textures/ball.png create mode 100644 environments/minigrid/assets/textures/box.png create mode 100644 environments/minigrid/assets/textures/crossing.png create mode 100644 environments/minigrid/assets/textures/door_closed.png create mode 100644 environments/minigrid/assets/textures/door_locked.png create mode 100644 environments/minigrid/assets/textures/door_opened.png create mode 100644 environments/minigrid/assets/textures/grid_tile.png create mode 100644 environments/minigrid/assets/textures/key.png create mode 100644 environments/minigrid/assets/textures/lava.png create mode 100644 environments/minigrid/setup.py create mode 100644 environments/minigrid/src/minigrid/__init__.py create mode 100644 environments/minigrid/src/minigrid/envs/__init__.py create mode 100644 environments/minigrid/src/minigrid/envs/blockedunlockpickup.py create mode 100644 environments/minigrid/src/minigrid/envs/crossing.py create mode 100644 environments/minigrid/src/minigrid/envs/distshift.py create mode 100644 environments/minigrid/src/minigrid/envs/doorkey.py create mode 100644 environments/minigrid/src/minigrid/envs/dynamicobstacles.py create mode 100644 environments/minigrid/src/minigrid/envs/empty.py create mode 100644 environments/minigrid/src/minigrid/envs/fetch.py create mode 100644 environments/minigrid/src/minigrid/envs/fourrooms.py create mode 100644 environments/minigrid/src/minigrid/envs/gotodoor.py create mode 100644 environments/minigrid/src/minigrid/envs/gotoobject.py create mode 100644 environments/minigrid/src/minigrid/envs/keycorridor.py create mode 100644 environments/minigrid/src/minigrid/envs/lavagap.py create mode 100644 environments/minigrid/src/minigrid/envs/lockedroom.py create mode 100644 environments/minigrid/src/minigrid/envs/memory.py create mode 100644 environments/minigrid/src/minigrid/envs/multiroom.py create mode 100644 environments/minigrid/src/minigrid/envs/obstructedmaze.py create mode 100644 environments/minigrid/src/minigrid/envs/playground_v0.py create mode 100644 environments/minigrid/src/minigrid/envs/putnear.py create mode 100644 environments/minigrid/src/minigrid/envs/redbluedoors.py create mode 100644 environments/minigrid/src/minigrid/envs/unlock.py create mode 100644 environments/minigrid/src/minigrid/envs/unlockpickup.py create mode 100644 environments/minigrid/src/minigrid/minigrid.py create mode 100644 environments/minigrid/src/minigrid/rendering.py create mode 100644 environments/minigrid/src/minigrid/roomgrid.py create mode 100644 environments/minigrid/src/minigrid/simenv_minigrid.py delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd delete mode 100644 integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd diff --git a/environments/minigrid/.gitignore b/environments/minigrid/.gitignore new file mode 100644 index 00000000..e69de29b diff --git a/environments/minigrid/LICENCE b/environments/minigrid/LICENCE new file mode 100644 index 00000000..a1a92b70 --- /dev/null +++ b/environments/minigrid/LICENCE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019 Maxime Chevalier-Boisvert + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/environments/minigrid/Makefile b/environments/minigrid/Makefile new file mode 100644 index 00000000..79c54efc --- /dev/null +++ b/environments/minigrid/Makefile @@ -0,0 +1,19 @@ +.PHONY: quality style test + +# Check that source code meets quality standards + +quality: + black --check --line-length 119 --target-version py38 . + isort --check-only tests src + flake8 . + +# Format source code automatically + +style: + black --line-length 119 --target-version py38 . + isort . + +# Run tests for the library + +test: + python -m pytest -n auto --dist=loadfile -s -v ./tests/ \ No newline at end of file diff --git a/environments/minigrid/README.md b/environments/minigrid/README.md new file mode 100644 index 00000000..a8f882cd --- /dev/null +++ b/environments/minigrid/README.md @@ -0,0 +1,31 @@ +# Minigrid-like environment + +Minimalistic gridworld environment inspired by [Minigrid](https://github.com/Farama-Foundation/gym-minigrid) + +## Installation +Create a virtual env, activate it, and then install `simenv`: + +``` +cd .. && git clone https://github.com/huggingface/simenv.git +cd simenv +pip install -e ".[dev]" +``` + +Then install the `minigrid` package: + +``` +cd environments/minigrid +pip install -e ".[dev]" +``` + +And it's done! + +### Style + +Before you merge a PR, fix the style (we use `isort` + `black`) +``` +make style +``` + +## Basic Usage + diff --git a/environments/minigrid/assets/textures/agent.png b/environments/minigrid/assets/textures/agent.png new file mode 100644 index 0000000000000000000000000000000000000000..e8f3482716891a61a623f68379c7fb8ef086edb3 GIT binary patch literal 349 zcmeAS@N?(olHy`uVBq!ia0vp^;y^6P!3-qd{&Rf`vv%fxB}^liV6b*ga7~kFJHb~K|$g7@8A9X{Q?34-@kuvXlUTz;CTN0 zc}`9a3k%Eb+qVM(0#2Vk?cm_BdGqG^^XE%QNW6dlJ|!jP`t|FF4Ealo5gdEbovGP?hc)oQW$zbn|B3f*Y*+YXHZ4ul-R;b8 z^}s`4rfH>Coh*7PxjEEBYew&;qPVM%xNb!1@J z*w6hZkrl`{4)6(a1=0!%3N|NmuVWEL-891|1s`Sa(3f`a|~_ct^& ztY5!gMMdTN_wOkwDGw7K1pu{i7I;J!GcfQS24TkI`72U@g8iN@jv*SsrIRAXoCO73 zZl5vU`**!ruT|5LzpM1VFfci3<@SUH9gv7fS37qorKaH~r<|E>f!g^Gwuf5lj^$Od zExa1}s;=;k`jbWl&Qb+WM_G+WmH!uVK5GAc$n@&ciX|+i6W%cgwcR;U6TVa_V#U(f z_uUx}bN~Ha_~Xp9wMq4hb!4B+t5*ujxv#$LV{M0BK_-{w>;-O4K<yy85}Sb4q9e E09%26Q~&?~ literal 0 HcmV?d00001 diff --git a/environments/minigrid/assets/textures/box.png b/environments/minigrid/assets/textures/box.png new file mode 100644 index 0000000000000000000000000000000000000000..d931b3151e904f2ea5d3cfa42b3b4cdc6eecc797 GIT binary patch literal 154 zcmeAS@N?(olHy`uVBq!ia0vp^G9b*t3?v(r4=o2$jKx9jP7LeL$-D$|SkfJR9T^xl z_H+M9WCils0(?STf%O0X|CipJe;3H&EbxddW?!lvI6!lvI6;R0X`wF|Ns97GD8ntt^-n>1s;*b3=DjSL74G){)!Z!poyo8V~EE2!lvI6;>0X`wF|Ns97GK-3ey42*@L4pMyk;M!Qe1}1p@p%4<6riAi4rmb9>lw6!yw$oAnne5JU6v1f%#e@@7O0}Kp5R1L)CmoCW%>SXYA L^>bP0l+XkKWLz-G literal 0 HcmV?d00001 diff --git a/environments/minigrid/assets/textures/door_opened.png b/environments/minigrid/assets/textures/door_opened.png new file mode 100644 index 0000000000000000000000000000000000000000..180e02890ae5cff2c4a8ee135029eec47a7281bb GIT binary patch literal 145 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnF3?v&v(vJfv#^NA%Cx&(BWL^R}Ea{HEjtmSN z`?>!lvI6;R0X`wFK>Gjx|4VPqzYAn>7I;J!GcfQS24TkI`72U@f+C(Sjv*T7lO0%l idKNP+W-2&vf`h>&fx+=oe&9-=JcFmJpUXO@geCy1c_z#N literal 0 HcmV?d00001 diff --git a/environments/minigrid/assets/textures/grid_tile.png b/environments/minigrid/assets/textures/grid_tile.png new file mode 100644 index 0000000000000000000000000000000000000000..8a1e2d91c6063ca80e32e98629f70d68a324c281 GIT binary patch literal 148 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnF3?v&v(vJfv#^NA%Cx&(BWL^R}Ea{HEjtmSN z`?>!lvI6;R0X`wFDJdyH=DRPhQ9z2bz$3Dlfr0NZ2s0kfUjdR6_jGX#(Kw%cfT5>J k!yzFgL4|Vx6B`@Dwnws$qpvtV0V-keboFyt=akR{0DovD&j0`b literal 0 HcmV?d00001 diff --git a/environments/minigrid/assets/textures/key.png b/environments/minigrid/assets/textures/key.png new file mode 100644 index 0000000000000000000000000000000000000000..d58b3a3b4295c1b80e970de31df1fbb1b0b60521 GIT binary patch literal 202 zcmeAS@N?(olHy`uVBq!ia0vp^JU}eP!VDw}uPguwFct^7J29*~C-V}>VM%xNb!1@J z*w6hZkrl}23GfMV1=4%=?0NX`;s5{tqn{j60rEKuJR*x382Ao@Fyrz36)8YLcTX3` z5Q)pl2?+@zDGZFM$*c+|Q*v@x9%*oCX&V?Y&UDbp;Rs3TadUjSKr8Xrf@KCA(%cUg qNU%!lvI6;R0X`wF{~LfTh7YcbCITtW0*}aI1_r*vAk26?e?|GrH3GcT#9BT!DhY6D*=MYkRX9J9LqdX~J&kFG@Ux|fAX7bE{an^L HB{Ts5@Y5@U literal 0 HcmV?d00001 diff --git a/environments/minigrid/setup.py b/environments/minigrid/setup.py new file mode 100644 index 00000000..3cd825fa --- /dev/null +++ b/environments/minigrid/setup.py @@ -0,0 +1,94 @@ +# Lint as: python3 +""" HuggingFace/minigrid is a simple gridworld environment for RL. + +Note: + + VERSION needs to be formatted following the MAJOR.MINOR.PATCH convention + (we need to follow this convention to be able to retrieve versioned scripts) + +Simple check list for release from AllenNLP repo: https://github.com/allenai/allennlp/blob/main/setup.py + +To create the package for pypi. + +0. Prerequisites: + - Dependencies: + - twine: "pip install twine" + - Create an account in (and join the 'simenv' project): + - PyPI: https://pypi.org/ + - Test PyPI: https://test.pypi.org/ + +1. Change the version in: + - __init__.py + - setup.py + +2. Commit these changes: "git commit -m 'Release: VERSION'" + +3. Add a tag in git to mark the release: "git tag VERSION -m 'Add tag VERSION for pypi'" + Push the tag to remote: git push --tags origin main + +4. Build both the sources and the wheel. Do not change anything in setup.py between + creating the wheel and the source distribution (obviously). + + First, delete any "build" directory that may exist from previous builds. + + For the wheel, run: "python setup.py bdist_wheel" in the top level directory. + (this will build a wheel for the python version you use to build it). + + For the sources, run: "python setup.py sdist" + You should now have a /dist directory with both .whl and .tar.gz source versions. + +5. Check that everything looks correct by uploading the package to the pypi test server: + + twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/ + + Check that you can install it in a virtualenv/notebook by running: + pip install -i https://testpypi.python.org/pypi simenv + +6. Upload the final version to actual pypi: + twine upload dist/* -r pypi + +7. Fill release notes in the tag in github once everything is looking hunky-dory. + +8. Change the version in __init__.py and setup.py to X.X.X+1.dev0 (e.g. VERSION=1.18.3 -> 1.18.4.dev0). + Then push the change with a message 'set dev version' +""" + +import os +import sys + +from setuptools import find_packages, setup + + +REQUIRED_PKGS = [ + "dataclasses_json", # For GLTF export/imports + "numpy>=1.17", # We use numpy>=1.17 to have np.random.Generator + "simenv", +] + +QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"] + +TESTS_REQUIRE = [ + # test dependencies +] + +EXTRAS_REQUIRE = { + "dev": TESTS_REQUIRE + QUALITY_REQUIRE, + "tests": TESTS_REQUIRE, + "quality": QUALITY_REQUIRE, +} + +setup( + name="minigrid", + description="HuggingFace simple gridworld environment for RL.", + long_description=open("README.md", encoding="utf-8").read(), + long_description_content_type="text/markdown", + author="HuggingFace Inc.", + author_email="carl@huggingface.co", + license="Apache 2.0", + version="0.0.1.dev0", + package_dir={"": "src"}, + packages=find_packages("src"), + install_requires=REQUIRED_PKGS, + extras_require=EXTRAS_REQUIRE, + keywords="simulation environments grid world reinforcement machine learning", +) diff --git a/environments/minigrid/src/minigrid/__init__.py b/environments/minigrid/src/minigrid/__init__.py new file mode 100644 index 00000000..22a9f2e0 --- /dev/null +++ b/environments/minigrid/src/minigrid/__init__.py @@ -0,0 +1 @@ +import minigrid.envs \ No newline at end of file diff --git a/environments/minigrid/src/minigrid/envs/__init__.py b/environments/minigrid/src/minigrid/envs/__init__.py new file mode 100644 index 00000000..078b90f5 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/__init__.py @@ -0,0 +1,21 @@ +from minigrid.envs.empty import * +from minigrid.envs.doorkey import * +from minigrid.envs.multiroom import * +from minigrid.envs.fetch import * +from minigrid.envs.gotoobject import * +from minigrid.envs.gotodoor import * +from minigrid.envs.putnear import * +from minigrid.envs.lockedroom import * +from minigrid.envs.keycorridor import * +from minigrid.envs.unlock import * +from minigrid.envs.unlockpickup import * +from minigrid.envs.blockedunlockpickup import * +from minigrid.envs.playground_v0 import * +from minigrid.envs.redbluedoors import * +from minigrid.envs.obstructedmaze import * +from minigrid.envs.memory import * +from minigrid.envs.fourrooms import * +from minigrid.envs.crossing import * +from minigrid.envs.lavagap import * +from minigrid.envs.dynamicobstacles import * +from minigrid.envs.distshift import * \ No newline at end of file diff --git a/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py b/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py new file mode 100644 index 00000000..9d4303d7 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py @@ -0,0 +1,47 @@ +from minigrid.minigrid import Ball +from minigrid.roomgrid import RoomGrid + + +class BlockedUnlockPickup(RoomGrid): + """ + Unlock a door blocked by a ball, then pick up a box + in another room + """ + + def __init__(self, seed=None): + room_size = 6 + super().__init__( + num_rows=1, + num_cols=2, + room_size=room_size, + max_steps=16*room_size**2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Add a box to the room on the right + obj, _ = self.add_object(1, 0, kind="box") + # Make sure the two rooms are directly connected by a locked door + door, pos = self.add_door(0, 0, 0, locked=True) + # Block the door with a ball + color = self._rand_color() + self.grid.set(pos[0]-1, pos[1], Ball(color)) + # Add a key to unlock the door + self.add_object(0, 0, 'key', door.color) + + self.place_agent(0, 0) + + self.obj = obj + self.mission = "pick up the %s %s" % (obj.color, obj.type) + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/crossing.py b/environments/minigrid/src/minigrid/envs/crossing.py new file mode 100644 index 00000000..2c319186 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/crossing.py @@ -0,0 +1,121 @@ +from minigrid.minigrid import * +import itertools as itt + + +class CrossingEnv(MiniGridEnv): + """ + Environment with wall or lava obstacles, sparse reward. + """ + + def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None): + self.num_crossings = num_crossings + self.obstacle_type = obstacle_type + super().__init__( + grid_size=size, + max_steps=4*size*size, + # Set this to True for maximum speed + see_through_walls=False, + seed=None + ) + + def _gen_grid(self, width, height): + assert width % 2 == 1 and height % 2 == 1 # odd size + + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place the agent in the top-left corner + self.agent_pos = (1, 1) + self.agent_dir = 0 + + # Place a goal square in the bottom-right corner + self.put_obj(Goal(), width - 2, height - 2) + + # Place obstacles (lava or walls) + v, h = object(), object() # singleton `vertical` and `horizontal` objects + + # Lava rivers or walls specified by direction and position in grid + rivers = [(v, i) for i in range(2, height - 2, 2)] + rivers += [(h, j) for j in range(2, width - 2, 2)] + self.np_random.shuffle(rivers) + rivers = rivers[:self.num_crossings] # sample random rivers + rivers_v = sorted([pos for direction, pos in rivers if direction is v]) + rivers_h = sorted([pos for direction, pos in rivers if direction is h]) + obstacle_pos = itt.chain( + itt.product(range(1, width - 1), rivers_h), + itt.product(rivers_v, range(1, height - 1)), + ) + for i, j in obstacle_pos: + self.put_obj(self.obstacle_type(), i, j) + + # Sample path to goal + path = [h] * len(rivers_v) + [v] * len(rivers_h) + self.np_random.shuffle(path) + + # Create openings + limits_v = [0] + rivers_v + [height - 1] + limits_h = [0] + rivers_h + [width - 1] + room_i, room_j = 0, 0 + for direction in path: + if direction is h: + i = limits_v[room_i + 1] + j = self.np_random.choice( + range(limits_h[room_j] + 1, limits_h[room_j + 1])) + room_i += 1 + elif direction is v: + i = self.np_random.choice( + range(limits_v[room_i] + 1, limits_v[room_i + 1])) + j = limits_h[room_j + 1] + room_j += 1 + else: + assert False + self.grid.set(i, j, None) + + self.mission = ( + "avoid the lava and get to the green goal square" + if self.obstacle_type == Lava + else "find the opening and get to the green goal square" + ) + + +class LavaCrossingEnv(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=1) + + +class LavaCrossingS9N2Env(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=2) + + +class LavaCrossingS9N3Env(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=3) + + +class LavaCrossingS11N5Env(CrossingEnv): + def __init__(self): + super().__init__(size=11, num_crossings=5) + + +class SimpleCrossingEnv(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=1, obstacle_type=Wall) + + +class SimpleCrossingS9N2Env(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=2, obstacle_type=Wall) + + +class SimpleCrossingS9N3Env(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=3, obstacle_type=Wall) + + +class SimpleCrossingS11N5Env(CrossingEnv): + def __init__(self): + super().__init__(size=11, num_crossings=5, obstacle_type=Wall) diff --git a/environments/minigrid/src/minigrid/envs/distshift.py b/environments/minigrid/src/minigrid/envs/distshift.py new file mode 100644 index 00000000..31d7c47d --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/distshift.py @@ -0,0 +1,62 @@ +from minigrid.minigrid import * + + +class DistShiftEnv(MiniGridEnv): + """ + Distributional shift environment. + """ + + def __init__( + self, + width=9, + height=7, + agent_start_pos=(1,1), + agent_start_dir=0, + strip2_row=2 + ): + self.agent_start_pos = agent_start_pos + self.agent_start_dir = agent_start_dir + self.goal_pos = (width-2, 1) + self.strip2_row = strip2_row + + super().__init__( + width=width, + height=height, + max_steps=4*width*height, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place a goal square in the bottom-right corner + self.put_obj(Goal(), *self.goal_pos) + + # Place the lava rows + for i in range(self.width - 6): + self.grid.set(3+i, 1, Lava()) + self.grid.set(3+i, self.strip2_row, Lava()) + + # Place the agent + if self.agent_start_pos is not None: + self.agent_pos = self.agent_start_pos + self.agent_dir = self.agent_start_dir + else: + self.place_agent() + + self.mission = "get to the green goal square" + + +class DistShift1(DistShiftEnv): + def __init__(self): + super().__init__(strip2_row=2) + + +class DistShift2(DistShiftEnv): + def __init__(self): + super().__init__(strip2_row=5) diff --git a/environments/minigrid/src/minigrid/envs/doorkey.py b/environments/minigrid/src/minigrid/envs/doorkey.py new file mode 100644 index 00000000..f333de2f --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/doorkey.py @@ -0,0 +1,59 @@ +from minigrid.minigrid import * + + +class DoorKeyEnv(MiniGridEnv): + """ + Environment with a door and key, sparse reward + """ + + def __init__(self, size=8): + super().__init__( + grid_size=size, + max_steps=10*size*size + ) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place a goal in the bottom-right corner + self.put_obj(Goal(), width - 2, height - 2) + + # Create a vertical splitting wall + splitIdx = self._rand_int(2, width-2) + self.grid.vert_wall(splitIdx, 0) + + # Place the agent at a random position and orientation + # on the left side of the splitting wall + self.place_agent(size=(splitIdx, height)) + + # Place a door in the wall + doorIdx = self._rand_int(1, width-2) + self.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx) + + # Place a yellow key on the left side + self.place_obj( + obj=Key('yellow'), + top=(0, 0), + size=(splitIdx, height) + ) + + self.mission = "use the key to open the door and then get to the goal" + + +class DoorKeyEnv5x5(DoorKeyEnv): + def __init__(self): + super().__init__(size=5) + + +class DoorKeyEnv6x6(DoorKeyEnv): + def __init__(self): + super().__init__(size=6) + + +class DoorKeyEnv16x16(DoorKeyEnv): + def __init__(self): + super().__init__(size=16) diff --git a/environments/minigrid/src/minigrid/envs/dynamicobstacles.py b/environments/minigrid/src/minigrid/envs/dynamicobstacles.py new file mode 100644 index 00000000..429632f7 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/dynamicobstacles.py @@ -0,0 +1,114 @@ +from minigrid.minigrid import * +from operator import add + + +class DynamicObstaclesEnv(MiniGridEnv): + """ + Single-room square grid environment with moving obstacles + """ + + def __init__( + self, + size=8, + agent_start_pos=(1, 1), + agent_start_dir=0, + n_obstacles=4 + ): + self.agent_start_pos = agent_start_pos + self.agent_start_dir = agent_start_dir + + # Reduce obstacles if there are too many + if n_obstacles <= size/2 + 1: + self.n_obstacles = int(n_obstacles) + else: + self.n_obstacles = int(size/2) + super().__init__( + grid_size=size, + max_steps=4 * size * size, + # Set this to True for maximum speed + see_through_walls=True, + ) + # Allow only 3 actions permitted: left, right, forward + self.action_space = spaces.Discrete(self.actions.forward + 1) + self.reward_range = (-1, 1) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place a goal square in the bottom-right corner + self.grid.set(width - 2, height - 2, Goal()) + + # Place the agent + if self.agent_start_pos is not None: + self.agent_pos = self.agent_start_pos + self.agent_dir = self.agent_start_dir + else: + self.place_agent() + + # Place obstacles + self.obstacles = [] + for i_obst in range(self.n_obstacles): + self.obstacles.append(Ball()) + self.place_obj(self.obstacles[i_obst], max_tries=100) + + self.mission = "get to the green goal square" + + def step(self, action): + # Invalid action + if action >= self.action_space.n: + action = 0 + + # Check if there is an obstacle in front of the agent + front_cell = self.grid.get(*self.front_pos) + not_clear = front_cell and front_cell.type != 'goal' + + # Update obstacle positions + for i_obst in range(len(self.obstacles)): + old_pos = self.obstacles[i_obst].cur_pos + top = tuple(map(add, old_pos, (-1, -1))) + + try: + self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100) + self.grid.set(*old_pos, None) + except: + pass + + # Update the agent's position/direction + obs, reward, done, info = MiniGridEnv.step(self, action) + + # If the agent tried to walk over an obstacle or wall + if action == self.actions.forward and not_clear: + reward = -1 + done = True + return obs, reward, done, info + + return obs, reward, done, info + + +class DynamicObstaclesEnv5x5(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=5, n_obstacles=2) + + +class DynamicObstaclesRandomEnv5x5(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=5, agent_start_pos=None, n_obstacles=2) + + +class DynamicObstaclesEnv6x6(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=6, n_obstacles=3) + + +class DynamicObstaclesRandomEnv6x6(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=6, agent_start_pos=None, n_obstacles=3) + + +class DynamicObstaclesEnv16x16(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=16, n_obstacles=8) diff --git a/environments/minigrid/src/minigrid/envs/empty.py b/environments/minigrid/src/minigrid/envs/empty.py new file mode 100644 index 00000000..33307fab --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/empty.py @@ -0,0 +1,68 @@ +from minigrid.minigrid import * + + +class EmptyEnv(MiniGridEnv): + """ + Empty grid environment, no obstacles, sparse reward + """ + + def __init__( + self, + size=8, + agent_start_pos=(1,1), + agent_start_dir=0, + ): + self.agent_start_pos = agent_start_pos + self.agent_start_dir = agent_start_dir + + super().__init__( + grid_size=size, + max_steps=4*size*size, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place a goal square in the bottom-right corner + self.put_obj(Goal(), width - 2, height - 2) + + # Place the agent + if self.agent_start_pos is not None: + self.agent_pos = self.agent_start_pos + self.agent_dir = self.agent_start_dir + else: + self.place_agent() + + self.mission = "get to the green goal square" + + +class EmptyEnv5x5(EmptyEnv): + def __init__(self, **kwargs): + super().__init__(size=5, **kwargs) + + +class EmptyRandomEnv5x5(EmptyEnv): + def __init__(self): + super().__init__(size=5, agent_start_pos=None) + + +class EmptyEnv6x6(EmptyEnv): + def __init__(self, **kwargs): + super().__init__(size=6, **kwargs) + + +class EmptyRandomEnv6x6(EmptyEnv): + def __init__(self): + super().__init__(size=6, agent_start_pos=None) + + +class EmptyEnv16x16(EmptyEnv): + def __init__(self, **kwargs): + super().__init__(size=16, **kwargs) + diff --git a/environments/minigrid/src/minigrid/envs/fetch.py b/environments/minigrid/src/minigrid/envs/fetch.py new file mode 100644 index 00000000..d87490d5 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/fetch.py @@ -0,0 +1,96 @@ +from minigrid.minigrid import * + + +class FetchEnv(MiniGridEnv): + """ + Environment in which the agent has to fetch a random object + named using English text strings + """ + + def __init__( + self, + size=8, + numObjs=3 + ): + self.numObjs = numObjs + + super().__init__( + grid_size=size, + max_steps=5*size**2, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height-1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width-1, 0) + + types = ['key', 'ball'] + + objs = [] + + # For each object to be generated + while len(objs) < self.numObjs: + objType = self._rand_elem(types) + objColor = self._rand_elem(COLOR_NAMES) + + if objType == 'key': + obj = Key(objColor) + elif objType == 'ball': + obj = Ball(objColor) + + self.place_obj(obj) + objs.append(obj) + + # Randomize the player start position and orientation + self.place_agent() + + # Choose a random object to be picked up + target = objs[self._rand_int(0, len(objs))] + self.targetType = target.type + self.targetColor = target.color + + descStr = '%s %s' % (self.targetColor, self.targetType) + + # Generate the mission string + idx = self._rand_int(0, 5) + if idx == 0: + self.mission = 'get a %s' % descStr + elif idx == 1: + self.mission = 'go get a %s' % descStr + elif idx == 2: + self.mission = 'fetch a %s' % descStr + elif idx == 3: + self.mission = 'go fetch a %s' % descStr + elif idx == 4: + self.mission = 'you must fetch a %s' % descStr + assert hasattr(self, 'mission') + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + + if self.carrying: + if self.carrying.color == self.targetColor and \ + self.carrying.type == self.targetType: + reward = self._reward() + done = True + else: + reward = 0 + done = True + + return obs, reward, done, info + + +class FetchEnv5x5N2(FetchEnv): + def __init__(self): + super().__init__(size=5, numObjs=2) + + +class FetchEnv6x6N2(FetchEnv): + def __init__(self): + super().__init__(size=6, numObjs=2) diff --git a/environments/minigrid/src/minigrid/envs/fourrooms.py b/environments/minigrid/src/minigrid/envs/fourrooms.py new file mode 100644 index 00000000..628c3d82 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/fourrooms.py @@ -0,0 +1,69 @@ +from minigrid.minigrid import * + + +class FourRoomsEnv(MiniGridEnv): + """ + Classic 4 rooms gridworld environment. + Can specify agent and goal position, if not it set at random. + """ + + def __init__(self, agent_pos=None, goal_pos=None): + self._agent_default_pos = agent_pos + self._goal_default_pos = goal_pos + super().__init__(grid_size=19, max_steps=100) + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height - 1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width - 1, 0) + + room_w = width // 2 + room_h = height // 2 + + # For each row of rooms + for j in range(0, 2): + + # For each column + for i in range(0, 2): + xL = i * room_w + yT = j * room_h + xR = xL + room_w + yB = yT + room_h + + # Bottom wall and door + if i + 1 < 2: + self.grid.vert_wall(xR, yT, room_h) + pos = (xR, self._rand_int(yT + 1, yB)) + self.grid.set(*pos, None) + + # Bottom wall and door + if j + 1 < 2: + self.grid.horz_wall(xL, yB, room_w) + pos = (self._rand_int(xL + 1, xR), yB) + self.grid.set(*pos, None) + + # Randomize the player start position and orientation + if self._agent_default_pos is not None: + self.agent_pos = self._agent_default_pos + self.grid.set(*self._agent_default_pos, None) + self.agent_dir = self._rand_int(0, 4) # assuming random start direction + else: + self.place_agent() + + if self._goal_default_pos is not None: + goal = Goal() + self.put_obj(goal, *self._goal_default_pos) + goal.init_pos, goal.cur_pos = self._goal_default_pos + else: + self.place_obj(Goal()) + + self.mission = 'Reach the goal' + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/gotodoor.py b/environments/minigrid/src/minigrid/envs/gotodoor.py new file mode 100644 index 00000000..0247e33b --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/gotodoor.py @@ -0,0 +1,91 @@ +from minigrid.minigrid import * + + +class GoToDoorEnv(MiniGridEnv): + """ + Environment in which the agent is instructed to go to a given object + named using an English text string + """ + + def __init__( + self, + size=5 + ): + assert size >= 5 + + super().__init__( + grid_size=size, + max_steps=5*size**2, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + # Randomly vary the room width and height + width = self._rand_int(5, width+1) + height = self._rand_int(5, height+1) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Generate the 4 doors at random positions + doorPos = [] + doorPos.append((self._rand_int(2, width-2), 0)) + doorPos.append((self._rand_int(2, width-2), height-1)) + doorPos.append((0, self._rand_int(2, height-2))) + doorPos.append((width-1, self._rand_int(2, height-2))) + + # Generate the door colors + doorColors = [] + while len(doorColors) < len(doorPos): + color = self._rand_elem(COLOR_NAMES) + if color in doorColors: + continue + doorColors.append(color) + + # Place the doors in the grid + for idx, pos in enumerate(doorPos): + color = doorColors[idx] + self.grid.set(*pos, Door(color)) + + # Randomize the agent start position and orientation + self.place_agent(size=(width, height)) + + # Select a random target door + doorIdx = self._rand_int(0, len(doorPos)) + self.target_pos = doorPos[doorIdx] + self.target_color = doorColors[doorIdx] + + # Generate the mission string + self.mission = 'go to the %s door' % self.target_color + + def step(self, action): + obs, reward, done, info = super().step(action) + + ax, ay = self.agent_pos + tx, ty = self.target_pos + + # Don't let the agent open any of the doors + if action == self.actions.toggle: + done = True + + # Reward performing done action in front of the target door + if action == self.actions.done: + if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1): + reward = self._reward() + done = True + + return obs, reward, done, info + + +class GoToDoor8x8Env(GoToDoorEnv): + def __init__(self): + super().__init__(size=8) + + +class GoToDoor6x6Env(GoToDoorEnv): + def __init__(self): + super().__init__(size=6) diff --git a/environments/minigrid/src/minigrid/envs/gotoobject.py b/environments/minigrid/src/minigrid/envs/gotoobject.py new file mode 100644 index 00000000..33701ab3 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/gotoobject.py @@ -0,0 +1,89 @@ +from minigrid.minigrid import * + + +class GoToObjectEnv(MiniGridEnv): + """ + Environment in which the agent is instructed to go to a given object + named using an English text string + """ + + def __init__( + self, + size=6, + numObjs=2 + ): + self.numObjs = numObjs + + super().__init__( + grid_size=size, + max_steps=5*size**2, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Types and colors of objects we can generate + types = ['key', 'ball', 'box'] + + objs = [] + objPos = [] + + # Until we have generated all the objects + while len(objs) < self.numObjs: + objType = self._rand_elem(types) + objColor = self._rand_elem(COLOR_NAMES) + + # If this object already exists, try again + if (objType, objColor) in objs: + continue + + if objType == 'key': + obj = Key(objColor) + elif objType == 'ball': + obj = Ball(objColor) + elif objType == 'box': + obj = Box(objColor) + + pos = self.place_obj(obj) + objs.append((objType, objColor)) + objPos.append(pos) + + # Randomize the agent start position and orientation + self.place_agent() + + # Choose a random object to be picked up + objIdx = self._rand_int(0, len(objs)) + self.targetType, self.target_color = objs[objIdx] + self.target_pos = objPos[objIdx] + + descStr = '%s %s' % (self.target_color, self.targetType) + self.mission = 'go to the %s' % descStr + #print(self.mission) + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + + ax, ay = self.agent_pos + tx, ty = self.target_pos + + # Toggle/pickup action terminates the episode + if action == self.actions.toggle: + done = True + + # Reward performing the done action next to the target object + if action == self.actions.done: + if abs(ax - tx) <= 1 and abs(ay - ty) <= 1: + reward = self._reward() + done = True + + return obs, reward, done, info + + +class GotoEnv8x8N2(GoToObjectEnv): + def __init__(self): + super().__init__(size=8, numObjs=2) diff --git a/environments/minigrid/src/minigrid/envs/keycorridor.py b/environments/minigrid/src/minigrid/envs/keycorridor.py new file mode 100644 index 00000000..0c417600 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/keycorridor.py @@ -0,0 +1,113 @@ +from minigrid.roomgrid import RoomGrid + + +class KeyCorridor(RoomGrid): + """ + A ball is behind a locked door, the key is placed in a + random room. + """ + + def __init__( + self, + num_rows=3, + obj_type="ball", + room_size=6, + seed=None + ): + self.obj_type = obj_type + + super().__init__( + room_size=room_size, + num_rows=num_rows, + max_steps=30*room_size**2, + seed=seed, + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Connect the middle column rooms into a hallway + for j in range(1, self.num_rows): + self.remove_wall(1, j, 3) + + # Add a locked door on the bottom right + # Add an object behind the locked door + room_idx = self._rand_int(0, self.num_rows) + door, _ = self.add_door(2, room_idx, 2, locked=True) + obj, _ = self.add_object(2, room_idx, kind=self.obj_type) + + # Add a key in a random room on the left side + self.add_object(0, self._rand_int(0, self.num_rows), 'key', door.color) + + # Place the agent in the middle + self.place_agent(1, self.num_rows // 2) + + # Make sure all rooms are accessible + self.connect_all() + + self.obj = obj + self.mission = "pick up the %s %s" % (obj.color, obj.type) + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info + + +class KeyCorridorS3R1(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=3, + num_rows=1, + seed=seed + ) + + +class KeyCorridorS3R2(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=3, + num_rows=2, + seed=seed + ) + + +class KeyCorridorS3R3(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=3, + num_rows=3, + seed=seed + ) + + +class KeyCorridorS4R3(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=4, + num_rows=3, + seed=seed + ) + + +class KeyCorridorS5R3(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=5, + num_rows=3, + seed=seed + ) + + +class KeyCorridorS6R3(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=6, + num_rows=3, + seed=seed + ) diff --git a/environments/minigrid/src/minigrid/envs/lavagap.py b/environments/minigrid/src/minigrid/envs/lavagap.py new file mode 100644 index 00000000..26565b84 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/lavagap.py @@ -0,0 +1,68 @@ +from minigrid.minigrid import * + + +class LavaGapEnv(MiniGridEnv): + """ + Environment with one wall of lava with a small gap to cross through + This environment is similar to LavaCrossing but simpler in structure. + """ + + def __init__(self, size, obstacle_type=Lava, seed=None): + self.obstacle_type = obstacle_type + super().__init__( + grid_size=size, + max_steps=4*size*size, + # Set this to True for maximum speed + see_through_walls=False, + seed=None + ) + + def _gen_grid(self, width, height): + assert width >= 5 and height >= 5 + + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place the agent in the top-left corner + self.agent_pos = (1, 1) + self.agent_dir = 0 + + # Place a goal square in the bottom-right corner + self.goal_pos = np.array((width - 2, height - 2)) + self.put_obj(Goal(), *self.goal_pos) + + # Generate and store random gap position + self.gap_pos = np.array(( + self._rand_int(2, width - 2), + self._rand_int(1, height - 1), + )) + + # Place the obstacle wall + self.grid.vert_wall(self.gap_pos[0], 1, height - 2, self.obstacle_type) + + # Put a hole in the wall + self.grid.set(*self.gap_pos, None) + + self.mission = ( + "avoid the lava and get to the green goal square" + if self.obstacle_type == Lava + else "find the opening and get to the green goal square" + ) + + +class LavaGapS5Env(LavaGapEnv): + def __init__(self): + super().__init__(size=5) + + +class LavaGapS6Env(LavaGapEnv): + def __init__(self): + super().__init__(size=6) + + +class LavaGapS7Env(LavaGapEnv): + def __init__(self): + super().__init__(size=7) diff --git a/environments/minigrid/src/minigrid/envs/lockedroom.py b/environments/minigrid/src/minigrid/envs/lockedroom.py new file mode 100644 index 00000000..535665b3 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/lockedroom.py @@ -0,0 +1,119 @@ +from gym_minigrid.minigrid import * + + +class Room: + def __init__(self, + top, + size, + doorPos + ): + self.top = top + self.size = size + self.doorPos = doorPos + self.color = None + self.locked = False + + def rand_pos(self, env): + topX, topY = self.top + sizeX, sizeY = self.size + return env._rand_pos( + topX + 1, topX + sizeX - 1, + topY + 1, topY + sizeY - 1 + ) + + +class LockedRoom(MiniGridEnv): + """ + Environment in which the agent is instructed to go to a given object + named using an English text string + """ + + def __init__( + self, + size=19 + ): + super().__init__(grid_size=size, max_steps=10*size) + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + for i in range(0, width): + self.grid.set(i, 0, Wall()) + self.grid.set(i, height-1, Wall()) + for j in range(0, height): + self.grid.set(0, j, Wall()) + self.grid.set(width-1, j, Wall()) + + # Hallway walls + lWallIdx = width // 2 - 2 + rWallIdx = width // 2 + 2 + for j in range(0, height): + self.grid.set(lWallIdx, j, Wall()) + self.grid.set(rWallIdx, j, Wall()) + + self.rooms = [] + + # Room splitting walls + for n in range(0, 3): + j = n * (height // 3) + for i in range(0, lWallIdx): + self.grid.set(i, j, Wall()) + for i in range(rWallIdx, width): + self.grid.set(i, j, Wall()) + + roomW = lWallIdx + 1 + roomH = height // 3 + 1 + self.rooms.append(Room( + (0, j), + (roomW, roomH), + (lWallIdx, j + 3) + )) + self.rooms.append(Room( + (rWallIdx, j), + (roomW, roomH), + (rWallIdx, j + 3) + )) + + # Choose one random room to be locked + lockedRoom = self._rand_elem(self.rooms) + lockedRoom.locked = True + goalPos = lockedRoom.rand_pos(self) + self.grid.set(*goalPos, Goal()) + + # Assign the door colors + colors = set(COLOR_NAMES) + for room in self.rooms: + color = self._rand_elem(sorted(colors)) + colors.remove(color) + room.color = color + if room.locked: + self.grid.set(*room.doorPos, Door(color, is_locked=True)) + else: + self.grid.set(*room.doorPos, Door(color)) + + # Select a random room to contain the key + while True: + keyRoom = self._rand_elem(self.rooms) + if keyRoom != lockedRoom: + break + keyPos = keyRoom.rand_pos(self) + self.grid.set(*keyPos, Key(lockedRoom.color)) + + # Randomize the player start position and orientation + self.agent_pos = self.place_agent( + top=(lWallIdx, 0), + size=(rWallIdx-lWallIdx, height) + ) + + # Generate the mission string + self.mission = ( + 'get the %s key from the %s room, ' + 'unlock the %s door and ' + 'go to the goal' + ) % (lockedRoom.color, keyRoom.color, lockedRoom.color) + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/memory.py b/environments/minigrid/src/minigrid/envs/memory.py new file mode 100644 index 00000000..ee7d3902 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/memory.py @@ -0,0 +1,130 @@ +from minigrid.minigrid import * + + +class MemoryEnv(MiniGridEnv): + """ + This environment is a memory test. The agent starts in a small room + where it sees an object. It then has to go through a narrow hallway + which ends in a split. At each end of the split there is an object, + one of which is the same as the object in the starting room. The + agent has to remember the initial object, and go to the matching + object at split. + """ + + def __init__( + self, + seed, + size=8, + random_length=False, + ): + self.random_length = random_length + super().__init__( + seed=seed, + grid_size=size, + max_steps=5*size**2, + # Set this to True for maximum speed + see_through_walls=False, + ) + + def _gen_grid(self, width, height): + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height-1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width - 1, 0) + + assert height % 2 == 1 + upper_room_wall = height // 2 - 2 + lower_room_wall = height // 2 + 2 + if self.random_length: + hallway_end = self._rand_int(4, width - 2) + else: + hallway_end = width - 3 + + # Start room + for i in range(1, 5): + self.grid.set(i, upper_room_wall, Wall()) + self.grid.set(i, lower_room_wall, Wall()) + self.grid.set(4, upper_room_wall + 1, Wall()) + self.grid.set(4, lower_room_wall - 1, Wall()) + + # Horizontal hallway + for i in range(5, hallway_end): + self.grid.set(i, upper_room_wall + 1, Wall()) + self.grid.set(i, lower_room_wall - 1, Wall()) + + # Vertical hallway + for j in range(0, height): + if j != height // 2: + self.grid.set(hallway_end, j, Wall()) + self.grid.set(hallway_end + 2, j, Wall()) + + # Fix the player's start position and orientation + self.agent_pos = (self._rand_int(1, hallway_end + 1), height // 2) + self.agent_dir = 0 + + # Place objects + start_room_obj = self._rand_elem([Key, Ball]) + self.grid.set(1, height // 2 - 1, start_room_obj('green')) + + other_objs = self._rand_elem([[Ball, Key], [Key, Ball]]) + pos0 = (hallway_end + 1, height // 2 - 2) + pos1 = (hallway_end + 1, height // 2 + 2) + self.grid.set(*pos0, other_objs[0]('green')) + self.grid.set(*pos1, other_objs[1]('green')) + + # Choose the target objects + if start_room_obj == other_objs[0]: + self.success_pos = (pos0[0], pos0[1] + 1) + self.failure_pos = (pos1[0], pos1[1] - 1) + else: + self.success_pos = (pos1[0], pos1[1] - 1) + self.failure_pos = (pos0[0], pos0[1] + 1) + + self.mission = 'go to the matching object at the end of the hallway' + + def step(self, action): + if action == MiniGridEnv.Actions.pickup: + action = MiniGridEnv.Actions.toggle + obs, reward, done, info = MiniGridEnv.step(self, action) + + if tuple(self.agent_pos) == self.success_pos: + reward = self._reward() + done = True + if tuple(self.agent_pos) == self.failure_pos: + reward = 0 + done = True + + return obs, reward, done, info + + +class MemoryS17Random(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=17, random_length=True) + + +class MemoryS13Random(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=13, random_length=True) + + +class MemoryS13(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=13) + + +class MemoryS11(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=11) + + +class MemoryS9(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=9) + + +class MemoryS7(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=7) diff --git a/environments/minigrid/src/minigrid/envs/multiroom.py b/environments/minigrid/src/minigrid/envs/multiroom.py new file mode 100644 index 00000000..94f9ce62 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/multiroom.py @@ -0,0 +1,264 @@ +from minigrid.minigrid import * + + +class Room: + def __init__(self, + top, + size, + entryDoorPos, + exitDoorPos + ): + self.top = top + self.size = size + self.entryDoorPos = entryDoorPos + self.exitDoorPos = exitDoorPos + + +class MultiRoomEnv(MiniGridEnv): + """ + Environment with multiple rooms (subgoals) + """ + + def __init__(self, + minNumRooms, + maxNumRooms, + maxRoomSize=10 + ): + assert minNumRooms > 0 + assert maxNumRooms >= minNumRooms + assert maxRoomSize >= 4 + + self.minNumRooms = minNumRooms + self.maxNumRooms = maxNumRooms + self.maxRoomSize = maxRoomSize + + self.rooms = [] + + super(MultiRoomEnv, self).__init__( + grid_size=25, + max_steps=self.maxNumRooms * 20 + ) + + def _gen_grid(self, width, height): + roomList = [] + + # Choose a random number of rooms to generate + numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms+1) + + while len(roomList) < numRooms: + curRoomList = [] + + entryDoorPos = ( + self._rand_int(0, width - 2), + self._rand_int(0, width - 2) + ) + + # Recursively place the rooms + self._placeRoom( + numRooms, + roomList=curRoomList, + minSz=4, + maxSz=self.maxRoomSize, + entryDoorWall=2, + entryDoorPos=entryDoorPos + ) + + if len(curRoomList) > len(roomList): + roomList = curRoomList + + # Store the list of rooms in this environment + assert len(roomList) > 0 + self.rooms = roomList + + # Create the grid + self.grid = Grid(width, height) + wall = Wall() + + prevDoorColor = None + + # For each room + for idx, room in enumerate(roomList): + + topX, topY = room.top + sizeX, sizeY = room.size + + # Draw the top and bottom walls + for i in range(0, sizeX): + self.grid.set(topX + i, topY, wall) + self.grid.set(topX + i, topY + sizeY - 1, wall) + + # Draw the left and right walls + for j in range(0, sizeY): + self.grid.set(topX, topY + j, wall) + self.grid.set(topX + sizeX - 1, topY + j, wall) + + # If this isn't the first room, place the entry door + if idx > 0: + # Pick a door color different from the previous one + doorColors = set(COLOR_NAMES) + if prevDoorColor: + doorColors.remove(prevDoorColor) + # Note: the use of sorting here guarantees determinism, + # This is needed because Python's set is not deterministic + doorColor = self._rand_elem(sorted(doorColors)) + + entryDoor = Door(doorColor) + self.grid.set(*room.entryDoorPos, entryDoor) + prevDoorColor = doorColor + + prevRoom = roomList[idx-1] + prevRoom.exitDoorPos = room.entryDoorPos + + # Randomize the starting agent position and direction + self.place_agent(roomList[0].top, roomList[0].size) + + # Place the final goal in the last room + self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size) + + self.mission = 'traverse the rooms to get to the goal' + + def _placeRoom( + self, + numLeft, + roomList, + minSz, + maxSz, + entryDoorWall, + entryDoorPos + ): + # Choose the room size randomly + sizeX = self._rand_int(minSz, maxSz+1) + sizeY = self._rand_int(minSz, maxSz+1) + + # The first room will be at the door position + if len(roomList) == 0: + topX, topY = entryDoorPos + # Entry on the right + elif entryDoorWall == 0: + topX = entryDoorPos[0] - sizeX + 1 + y = entryDoorPos[1] + topY = self._rand_int(y - sizeY + 2, y) + # Entry wall on the south + elif entryDoorWall == 1: + x = entryDoorPos[0] + topX = self._rand_int(x - sizeX + 2, x) + topY = entryDoorPos[1] - sizeY + 1 + # Entry wall on the left + elif entryDoorWall == 2: + topX = entryDoorPos[0] + y = entryDoorPos[1] + topY = self._rand_int(y - sizeY + 2, y) + # Entry wall on the top + elif entryDoorWall == 3: + x = entryDoorPos[0] + topX = self._rand_int(x - sizeX + 2, x) + topY = entryDoorPos[1] + else: + assert False, entryDoorWall + + # If the room is out of the grid, can't place a room here + if topX < 0 or topY < 0: + return False + if topX + sizeX > self.width or topY + sizeY >= self.height: + return False + + # If the room intersects with previous rooms, can't place it here + for room in roomList[:-1]: + nonOverlap = \ + topX + sizeX < room.top[0] or \ + room.top[0] + room.size[0] <= topX or \ + topY + sizeY < room.top[1] or \ + room.top[1] + room.size[1] <= topY + + if not nonOverlap: + return False + + # Add this room to the list + roomList.append(Room( + (topX, topY), + (sizeX, sizeY), + entryDoorPos, + None + )) + + # If this was the last room, stop + if numLeft == 1: + return True + + # Try placing the next room + for i in range(0, 8): + + # Pick which wall to place the out door on + wallSet = set((0, 1, 2, 3)) + wallSet.remove(entryDoorWall) + exitDoorWall = self._rand_elem(sorted(wallSet)) + nextEntryWall = (exitDoorWall + 2) % 4 + + # Pick the exit door position + # Exit on right wall + if exitDoorWall == 0: + exitDoorPos = ( + topX + sizeX - 1, + topY + self._rand_int(1, sizeY - 1) + ) + # Exit on south wall + elif exitDoorWall == 1: + exitDoorPos = ( + topX + self._rand_int(1, sizeX - 1), + topY + sizeY - 1 + ) + # Exit on left wall + elif exitDoorWall == 2: + exitDoorPos = ( + topX, + topY + self._rand_int(1, sizeY - 1) + ) + # Exit on north wall + elif exitDoorWall == 3: + exitDoorPos = ( + topX + self._rand_int(1, sizeX - 1), + topY + ) + else: + assert False + + # Recursively create the other rooms + success = self._placeRoom( + numLeft - 1, + roomList=roomList, + minSz=minSz, + maxSz=maxSz, + entryDoorWall=nextEntryWall, + entryDoorPos=exitDoorPos + ) + + if success: + break + + return True + + +class MultiRoomEnvN2S4(MultiRoomEnv): + def __init__(self): + super().__init__( + minNumRooms=2, + maxNumRooms=2, + maxRoomSize=4 + ) + + +class MultiRoomEnvN4S5(MultiRoomEnv): + def __init__(self): + super().__init__( + minNumRooms=4, + maxNumRooms=4, + maxRoomSize=5 + ) + + +class MultiRoomEnvN6(MultiRoomEnv): + def __init__(self): + super().__init__( + minNumRooms=6, + maxNumRooms=6 + ) diff --git a/environments/minigrid/src/minigrid/envs/obstructedmaze.py b/environments/minigrid/src/minigrid/envs/obstructedmaze.py new file mode 100644 index 00000000..fec12421 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/obstructedmaze.py @@ -0,0 +1,187 @@ +from minigrid.minigrid import * +from minigrid.roomgrid import RoomGrid + + +class ObstructedMazeEnv(RoomGrid): + """ + A blue ball is hidden in the maze. Doors may be locked, + doors may be obstructed by a ball and keys may be hidden in boxes. + """ + + def __init__(self, + num_rows, + num_cols, + num_rooms_visited, + seed=None + ): + room_size = 6 + max_steps = 4 * num_rooms_visited * room_size ** 2 + + super().__init__( + room_size=room_size, + num_rows=num_rows, + num_cols=num_cols, + max_steps=max_steps, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Define all possible colors for doors + self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES)) + # Define the color of the ball to pick up + self.ball_to_find_color = COLOR_NAMES[0] + # Define the color of the balls that obstruct doors + self.blocking_ball_color = COLOR_NAMES[1] + # Define the color of boxes in which keys are hidden + self.box_color = COLOR_NAMES[2] + + self.mission = "pick up the %s ball" % self.ball_to_find_color + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info + + def add_door(self, i, j, door_idx=0, color=None, locked=False, key_in_box=False, blocked=False): + """ + Add a door. If the door must be locked, it also adds the key. + If the key must be hidden, it is put in a box. If the door must + be obstructed, it adds a ball in front of the door. + """ + + door, door_pos = super().add_door(i, j, door_idx, color, locked=locked) + + if blocked: + vec = DIR_TO_VEC[door_idx] + blocking_ball = Ball(self.blocking_ball_color) if blocked else None + self.grid.set(door_pos[0] - vec[0], door_pos[1] - vec[1], blocking_ball) + + if locked: + obj = Key(door.color) + if key_in_box: + box = Box(self.box_color) if key_in_box else None + box.contains = obj + obj = box + self.place_in_room(i, j, obj) + + return door, door_pos + + +class ObstructedMaze1Dlhb(ObstructedMazeEnv): + """ + A blue ball is hidden in a 2x1 maze. A locked door separates + rooms. Doors are obstructed by a ball and keys are hidden in boxes. + """ + + def __init__(self, key_in_box=True, blocked=True, seed=None): + self.key_in_box = key_in_box + self.blocked = blocked + + super().__init__( + num_rows=1, + num_cols=2, + num_rooms_visited=2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + self.add_door(0, 0, door_idx=0, color=self.door_colors[0], + locked=True, + key_in_box=self.key_in_box, + blocked=self.blocked) + + self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color) + self.place_agent(0, 0) + + +class ObstructedMaze1Dl(ObstructedMaze1Dlhb): + def __init__(self, seed=None): + super().__init__(False, False, seed) + + +class ObstructedMaze1Dlh(ObstructedMaze1Dlhb): + def __init__(self, seed=None): + super().__init__(True, False, seed) + + +class ObstructedMazeFull(ObstructedMazeEnv): + """ + A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors + are locked, doors are obstructed by a ball and keys are hidden in + boxes. + """ + + def __init__(self, agent_room=(1, 1), key_in_box=True, blocked=True, + num_quarters=4, num_rooms_visited=25, seed=None): + self.agent_room = agent_room + self.key_in_box = key_in_box + self.blocked = blocked + self.num_quarters = num_quarters + + super().__init__( + num_rows=3, + num_cols=3, + num_rooms_visited=num_rooms_visited, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + middle_room = (1, 1) + # Define positions of "side rooms" i.e. rooms that are neither + # corners nor the center. + side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][:self.num_quarters] + for i in range(len(side_rooms)): + side_room = side_rooms[i] + + # Add a door between the center room and the side room + self.add_door(*middle_room, door_idx=i, color=self.door_colors[i], locked=False) + + for k in [-1, 1]: + # Add a door to each side of the side room + self.add_door(*side_room, locked=True, + door_idx=(i + k) % 4, + color=self.door_colors[(i + k) % len(self.door_colors)], + key_in_box=self.key_in_box, + blocked=self.blocked) + + corners = [(2, 0), (2, 2), (0, 2), (0, 0)][:self.num_quarters] + ball_room = self._rand_elem(corners) + + self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color) + self.place_agent(*self.agent_room) + + +class ObstructedMaze2Dl(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((2, 1), False, False, 1, 4, seed) + + +class ObstructedMaze2Dlh(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((2, 1), True, False, 1, 4, seed) + + +class ObstructedMaze2Dlhb(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((2, 1), True, True, 1, 4, seed) + + +class ObstructedMaze1Q(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((1, 1), True, True, 1, 5, seed) + + +class ObstructedMaze2Q(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((1, 1), True, True, 2, 11, seed) diff --git a/environments/minigrid/src/minigrid/envs/playground_v0.py b/environments/minigrid/src/minigrid/envs/playground_v0.py new file mode 100644 index 00000000..20e2da03 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/playground_v0.py @@ -0,0 +1,71 @@ +from minigrid.minigrid import * + + +class PlaygroundV0(MiniGridEnv): + """ + Environment with multiple rooms and random objects. + This environment has no specific goals or rewards. + """ + + def __init__(self): + super().__init__(grid_size=19, max_steps=100) + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height-1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width-1, 0) + + roomW = width // 3 + roomH = height // 3 + + # For each row of rooms + for j in range(0, 3): + + # For each column + for i in range(0, 3): + xL = i * roomW + yT = j * roomH + xR = xL + roomW + yB = yT + roomH + + # Bottom wall and door + if i+1 < 3: + self.grid.vert_wall(xR, yT, roomH) + pos = (xR, self._rand_int(yT+1, yB-1)) + color = self._rand_elem(COLOR_NAMES) + self.grid.set(*pos, Door(color)) + + # Bottom wall and door + if j+1 < 3: + self.grid.horz_wall(xL, yB, roomW) + pos = (self._rand_int(xL+1, xR-1), yB) + color = self._rand_elem(COLOR_NAMES) + self.grid.set(*pos, Door(color)) + + # Randomize the player start position and orientation + self.place_agent() + + # Place random objects in the world + types = ['key', 'ball', 'box'] + for i in range(0, 12): + objType = self._rand_elem(types) + objColor = self._rand_elem(COLOR_NAMES) + if objType == 'key': + obj = Key(objColor) + elif objType == 'ball': + obj = Ball(objColor) + elif objType == 'box': + obj = Box(objColor) + self.place_obj(obj) + + # No explicit mission in this environment + self.mission = '' + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/putnear.py b/environments/minigrid/src/minigrid/envs/putnear.py new file mode 100644 index 00000000..bcd96f62 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/putnear.py @@ -0,0 +1,117 @@ +from minigrid.minigrid import * + + +class PutNearEnv(MiniGridEnv): + """ + Environment in which the agent is instructed to place an object near + another object through a natural language string. + """ + + def __init__( + self, + size=6, + numObjs=2 + ): + self.numObjs = numObjs + + super().__init__( + grid_size=size, + max_steps=5*size, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height-1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width-1, 0) + + # Types and colors of objects we can generate + types = ['key', 'ball', 'box'] + + objs = [] + objPos = [] + + def near_obj(env, p1): + for p2 in objPos: + dx = p1[0] - p2[0] + dy = p1[1] - p2[1] + if abs(dx) <= 1 and abs(dy) <= 1: + return True + return False + + # Until we have generated all the objects + while len(objs) < self.numObjs: + objType = self._rand_elem(types) + objColor = self._rand_elem(COLOR_NAMES) + + # If this object already exists, try again + if (objType, objColor) in objs: + continue + + if objType == 'key': + obj = Key(objColor) + elif objType == 'ball': + obj = Ball(objColor) + elif objType == 'box': + obj = Box(objColor) + + pos = self.place_obj(obj, reject_fn=near_obj) + + objs.append((objType, objColor)) + objPos.append(pos) + + # Randomize the agent start position and orientation + self.place_agent() + + # Choose a random object to be moved + objIdx = self._rand_int(0, len(objs)) + self.move_type, self.moveColor = objs[objIdx] + self.move_pos = objPos[objIdx] + + # Choose a target object (to put the first object next to) + while True: + targetIdx = self._rand_int(0, len(objs)) + if targetIdx != objIdx: + break + self.target_type, self.target_color = objs[targetIdx] + self.target_pos = objPos[targetIdx] + + self.mission = 'put the %s %s near the %s %s' % ( + self.moveColor, + self.move_type, + self.target_color, + self.target_type + ) + + def step(self, action): + preCarrying = self.carrying + + obs, reward, done, info = super().step(action) + + u, v = self.dir_vec + ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v) + tx, ty = self.target_pos + + # If we picked up the wrong object, terminate the episode + if action == self.actions.pickup and self.carrying: + if self.carrying.type != self.move_type or self.carrying.color != self.moveColor: + done = True + + # If successfully dropping an object near the target + if action == self.actions.drop and preCarrying: + if self.grid.get(ox, oy) is preCarrying: + if abs(ox - tx) <= 1 and abs(oy - ty) <= 1: + reward = self._reward() + done = True + + return obs, reward, done, info + + +class PutNear8x8N3(PutNearEnv): + def __init__(self): + super().__init__(size=8, numObjs=3) diff --git a/environments/minigrid/src/minigrid/envs/redbluedoors.py b/environments/minigrid/src/minigrid/envs/redbluedoors.py new file mode 100644 index 00000000..e847528a --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/redbluedoors.py @@ -0,0 +1,71 @@ +from minigrid.minigrid import * + + +class RedBlueDoorEnv(MiniGridEnv): + """ + Single room with red and blue doors on opposite sides. + The red door must be opened before the blue door to + obtain a reward. + """ + + def __init__(self, size=8): + self.size = size + + super().__init__( + width=2*size, + height=size, + max_steps=20*size*size + ) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the grid walls + self.grid.wall_rect(0, 0, 2*self.size, self.size) + self.grid.wall_rect(self.size//2, 0, self.size, self.size) + + # Place the agent in the top-left corner + self.place_agent(top=(self.size//2, 0), size=(self.size, self.size)) + + # Add a red door at a random position in the left wall + pos = self._rand_int(1, self.size - 1) + self.red_door = Door("red") + self.grid.set(self.size//2, pos, self.red_door) + + # Add a blue door at a random position in the right wall + pos = self._rand_int(1, self.size - 1) + self.blue_door = Door("blue") + self.grid.set(self.size//2 + self.size - 1, pos, self.blue_door) + + # Generate the mission string + self.mission = "open the red door then the blue door" + + def step(self, action): + red_door_opened_before = self.red_door.is_open + blue_door_opened_before = self.blue_door.is_open + + obs, reward, done, info = MiniGridEnv.step(self, action) + + red_door_opened_after = self.red_door.is_open + blue_door_opened_after = self.blue_door.is_open + + if blue_door_opened_after: + if red_door_opened_before: + reward = self._reward() + done = True + else: + reward = 0 + done = True + + elif red_door_opened_after: + if blue_door_opened_before: + reward = 0 + done = True + + return obs, reward, done, info + + +class RedBlueDoorEnv6x6(RedBlueDoorEnv): + def __init__(self): + super().__init__(size=6) diff --git a/environments/minigrid/src/minigrid/envs/unlock.py b/environments/minigrid/src/minigrid/envs/unlock.py new file mode 100644 index 00000000..f6b62d4e --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/unlock.py @@ -0,0 +1,40 @@ +from gym_minigrid.roomgrid import RoomGrid + + +class Unlock(RoomGrid): + """ + Unlock a door + """ + + def __init__(self, seed=None): + room_size = 6 + super().__init__( + num_rows=1, + num_cols=2, + room_size=room_size, + max_steps=8*room_size**2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Make sure the two rooms are directly connected by a locked door + door, _ = self.add_door(0, 0, 0, locked=True) + # Add a key to unlock the door + self.add_object(0, 0, 'key', door.color) + + self.place_agent(0, 0) + + self.door = door + self.mission = "open the door" + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.toggle: + if self.door.is_open: + reward = self._reward() + done = True + + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/unlockpickup.py b/environments/minigrid/src/minigrid/envs/unlockpickup.py new file mode 100644 index 00000000..8ca7a3bb --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/unlockpickup.py @@ -0,0 +1,42 @@ +from gym_minigrid.roomgrid import RoomGrid + + +class UnlockPickup(RoomGrid): + """ + Unlock a door, then pick up a box in another room + """ + + def __init__(self, seed=None): + room_size = 6 + super().__init__( + num_rows=1, + num_cols=2, + room_size=room_size, + max_steps=8*room_size**2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Add a box to the room on the right + obj, _ = self.add_object(1, 0, kind="box") + # Make sure the two rooms are directly connected by a locked door + door, _ = self.add_door(0, 0, 0, locked=True) + # Add a key to unlock the door + self.add_object(0, 0, 'key', door.color) + + self.place_agent(0, 0) + + self.obj = obj + self.mission = "pick up the %s %s" % (obj.color, obj.type) + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/minigrid.py b/environments/minigrid/src/minigrid/minigrid.py new file mode 100644 index 00000000..ec0e6e71 --- /dev/null +++ b/environments/minigrid/src/minigrid/minigrid.py @@ -0,0 +1,1329 @@ +import math +import hashlib +from enum import IntEnum + +import numpy as np + +import gym +from gym import spaces +from gym.utils import seeding + +from rendering import * +import simenv as sm + +# Size in pixels of a tile in the full-scale human view + + +TILE_PIXELS = 32 + +# Map of color names to RGB values +COLORS = { + 'red': np.array([255, 0, 0]), + 'green': np.array([0, 255, 0]), + 'blue': np.array([0, 0, 255]), + 'purple': np.array([112, 39, 195]), + 'yellow': np.array([255, 255, 0]), + 'grey': np.array([100, 100, 100]) +} + +COLOR_NAMES = sorted(list(COLORS.keys())) + +# Used to map colors to integers +COLOR_TO_IDX = { + 'red': 0, + 'green': 1, + 'blue': 2, + 'purple': 3, + 'yellow': 4, + 'grey': 5 +} + +IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys())) + +# Map of object type to integers +OBJECT_TO_IDX = { + 'unseen': 0, + 'empty': 1, + 'wall': 2, + 'floor': 3, + 'door': 4, + 'key': 5, + 'ball': 6, + 'box': 7, + 'goal': 8, + 'lava': 9, + 'agent': 10, +} + +IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys())) + +# Map of state names to integers +STATE_TO_IDX = { + 'open': 0, + 'closed': 1, + 'locked': 2, +} + +# Map of agent direction indices to vectors +DIR_TO_VEC = [ + # Pointing right (positive X) + np.array((1, 0)), + # Down (positive Y) + np.array((0, 1)), + # Pointing left (negative X) + np.array((-1, 0)), + # Up (negative Y) + np.array((0, -1)), +] + + +class WorldObj: + """ + Base class for grid world objects + """ + + def __init__(self, type, color): + assert type in OBJECT_TO_IDX, type + assert color in COLOR_TO_IDX, color + self.type = type + self.color = color + self.contains = None + + # Initial position of the object + self.init_pos = None + + # Current position of the object + self.cur_pos = None + + def can_overlap(self): + """Can the agent overlap with this?""" + return False + + def can_pickup(self): + """Can the agent pick this up?""" + return False + + def can_contain(self): + """Can this contain another object?""" + return False + + def see_behind(self): + """Can the agent see behind this object?""" + return True + + def toggle(self, env, pos): + """Method to trigger/toggle an action this object performs""" + return False + + def encode(self): + """Encode the a description of this object as a 3-tuple of integers""" + return OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], 0 + + @staticmethod + def decode(type_idx, color_idx, state): + """Create an object from a 3-tuple state description""" + + obj_type = IDX_TO_OBJECT[type_idx] + color = IDX_TO_COLOR[color_idx] + + if obj_type == 'empty' or obj_type == 'unseen': + return None + + # State, 0: open, 1: closed, 2: locked + is_open = state == 0 + is_locked = state == 2 + + if obj_type == 'wall': + v = Wall(color) + elif obj_type == 'floor': + v = Floor(color) + elif obj_type == 'ball': + v = Ball(color) + elif obj_type == 'key': + v = Key(color) + elif obj_type == 'box': + v = Box(color) + elif obj_type == 'door': + v = Door(color, is_open, is_locked) + elif obj_type == 'goal': + v = Goal() + elif obj_type == 'lava': + v = Lava() + else: + assert False, "unknown object type in decode '%s'" % obj_type + + return v + + def render(self, r): + """Draw this object with the given renderer""" + raise NotImplementedError + + +class Goal(WorldObj): + def __init__(self): + super().__init__('goal', 'green') + + def can_overlap(self): + return True + + def render(self, img): + fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color]) + sm.Rectangle([], color=COLORS[self.color]).draw(img) + + +class Floor(WorldObj): + """ + Colored floor tile the agent can walk over + """ + + def __init__(self, color='blue'): + super().__init__('floor', color) + + def can_overlap(self): + return True + + def render(self, img): + # Give the floor a pale color + color = COLORS[self.color] / 2 + fill_coords(img, point_in_rect(0.031, 1, 0.031, 1), color) + + +class Lava(WorldObj): + def __init__(self): + super().__init__('lava', 'red') + + def can_overlap(self): + return True + + def render(self, img): + c = (255, 128, 0) + + # Background color + fill_coords(img, point_in_rect(0, 1, 0, 1), c) + + # Little waves + for i in range(3): + ylo = 0.3 + 0.2 * i + yhi = 0.4 + 0.2 * i + fill_coords(img, point_in_line(0.1, ylo, 0.3, yhi, r=0.03), (0, 0, 0)) + fill_coords(img, point_in_line(0.3, yhi, 0.5, ylo, r=0.03), (0, 0, 0)) + fill_coords(img, point_in_line(0.5, ylo, 0.7, yhi, r=0.03), (0, 0, 0)) + fill_coords(img, point_in_line(0.7, yhi, 0.9, ylo, r=0.03), (0, 0, 0)) + + +class Wall(WorldObj): + def __init__(self, color='grey'): + super().__init__('wall', color) + + def see_behind(self): + return False + + def render(self, img): + fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color]) + + +class Door(WorldObj): + def __init__(self, color, is_open=False, is_locked=False): + super().__init__('door', color) + self.is_open = is_open + self.is_locked = is_locked + + def can_overlap(self): + """The agent can only walk over this cell when the door is open""" + return self.is_open + + def see_behind(self): + return self.is_open + + def toggle(self, env, pos): + # If the player has the right key to open the door + if self.is_locked: + if isinstance(env.carrying, Key) and env.carrying.color == self.color: + self.is_locked = False + self.is_open = True + return True + return False + + self.is_open = not self.is_open + return True + + def encode(self): + """Encode the a description of this object as a 3-tuple of integers""" + + # State, 0: open, 1: closed, 2: locked + state = None + if self.is_open: + state = 0 + elif self.is_locked: + state = 2 + elif not self.is_open: + state = 1 + + return OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state + + def render(self, img): + c = COLORS[self.color] + + if self.is_open: + fill_coords(img, point_in_rect(0.88, 1.00, 0.00, 1.00), c) + fill_coords(img, point_in_rect(0.92, 0.96, 0.04, 0.96), (0, 0, 0)) + return + + # Door frame and door + if self.is_locked: + fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c) + fill_coords(img, point_in_rect(0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c)) + + # Draw key slot + fill_coords(img, point_in_rect(0.52, 0.75, 0.50, 0.56), c) + else: + fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c) + fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0, 0, 0)) + fill_coords(img, point_in_rect(0.08, 0.92, 0.08, 0.92), c) + fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), (0, 0, 0)) + + # Draw door handle + fill_coords(img, point_in_circle(cx=0.75, cy=0.50, r=0.08), c) + + +class Key(WorldObj): + def __init__(self, color='blue'): + super(Key, self).__init__('key', color) + + def can_pickup(self): + return True + + def render(self, img): + c = COLORS[self.color] + + # Vertical quad + fill_coords(img, point_in_rect(0.50, 0.63, 0.31, 0.88), c) + + # Teeth + fill_coords(img, point_in_rect(0.38, 0.50, 0.59, 0.66), c) + fill_coords(img, point_in_rect(0.38, 0.50, 0.81, 0.88), c) + + # Ring + fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.190), c) + fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.064), (0, 0, 0)) + + +class Ball(WorldObj): + def __init__(self, color='blue'): + super(Ball, self).__init__('ball', color) + + def can_pickup(self): + return True + + def render(self, img): + fill_coords(img, point_in_circle(0.5, 0.5, 0.31), COLORS[self.color]) + + +class Box(WorldObj): + def __init__(self, color, contains=None): + super(Box, self).__init__('box', color) + self.contains = contains + + def can_pickup(self): + return True + + def render(self, img): + c = COLORS[self.color] + + # Outline + fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), c) + fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.82), (0, 0, 0)) + + # Horizontal slit + fill_coords(img, point_in_rect(0.16, 0.84, 0.47, 0.53), c) + + def toggle(self, env, pos): + # Replace the box by its contents + env.grid.set(*pos, self.contains) + return True + + +class Grid: + """ + Represent a grid and operations on it + """ + + # Static cache of pre-renderer tiles + tile_cache = {} + + def __init__(self, width, height): + assert width >= 3 + assert height >= 3 + + self.width = width + self.height = height + + self.grid = [None] * width * height + + def __contains__(self, key): + if isinstance(key, WorldObj): + for e in self.grid: + if e is key: + return True + elif isinstance(key, tuple): + for e in self.grid: + if e is None: + continue + if (e.color, e.type) == key: + return True + if key[0] is None and key[1] == e.type: + return True + return False + + def __eq__(self, other): + grid1 = self.encode() + grid2 = other.encode() + return np.array_equal(grid2, grid1) + + def __ne__(self, other): + return not self == other + + def copy(self): + from copy import deepcopy + return deepcopy(self) + + def set(self, i, j, v): + assert 0 <= i < self.width + assert 0 <= j < self.height + self.grid[j * self.width + i] = v + + def get(self, i, j): + assert 0 <= i < self.width + assert 0 <= j < self.height + return self.grid[j * self.width + i] + + def horz_wall(self, x, y, length=None, obj_type=Wall): + if length is None: + length = self.width - x + for i in range(0, length): + self.set(x + i, y, obj_type()) + + def vert_wall(self, x, y, length=None, obj_type=Wall): + if length is None: + length = self.height - y + for j in range(0, length): + self.set(x, y + j, obj_type()) + + def wall_rect(self, x, y, w, h): + self.horz_wall(x, y, w) + self.horz_wall(x, y + h - 1, w) + self.vert_wall(x, y, h) + self.vert_wall(x + w - 1, y, h) + + def rotate_left(self): + """ + Rotate the grid to the left (counter-clockwise) + """ + + grid = Grid(self.height, self.width) + + for i in range(self.width): + for j in range(self.height): + v = self.get(i, j) + grid.set(j, grid.height - 1 - i, v) + + return grid + + def slice(self, topX, topY, width, height): + """ + Get a subset of the grid + """ + + grid = Grid(width, height) + + for j in range(0, height): + for i in range(0, width): + x = topX + i + y = topY + j + + if 0 <= x < self.width and \ + 0 <= y < self.height: + v = self.get(x, y) + else: + v = Wall() + + grid.set(i, j, v) + + return grid + + @classmethod + def render_tile( + cls, + obj, + agent_dir=None, + highlight=False, + tile_size=TILE_PIXELS, + subdivs=3 + ): + """ + Render a tile and cache the result + """ + + # Hash map lookup key for the cache + key = (agent_dir, highlight, tile_size) + key = obj.encode() + key if obj else key + + if key in cls.tile_cache: + return cls.tile_cache[key] + + img = np.zeros(shape=(tile_size * subdivs, tile_size * subdivs, 3), dtype=np.uint8) + + # Draw the grid lines (top and left edges) + fill_coords(img, point_in_rect(0, 0.031, 0, 1), (100, 100, 100)) + fill_coords(img, point_in_rect(0, 1, 0, 0.031), (100, 100, 100)) + + if obj is not None: + obj.render(img) + + # Overlay the agent on top + if agent_dir is not None: + tri_fn = point_in_triangle( + (0.12, 0.19), + (0.87, 0.50), + (0.12, 0.81), + ) + + # Rotate the agent based on its direction + tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5, theta=0.5 * math.pi * agent_dir) + fill_coords(img, tri_fn, (255, 0, 0)) + + # Highlight the cell if needed + if highlight: + highlight_img(img) + + # Downsample the image to perform supersampling/anti-aliasing + img = downsample(img, subdivs) + + # Cache the rendered tile + cls.tile_cache[key] = img + + return img + + def render( + self, + tile_size, + agent_pos=None, + agent_dir=None, + highlight_mask=None + ): + """ + Render this grid at a given scale + :param tile_size: tile size in pixels + :param agent_pos: position of the agent + :param agent_dir: direction of the agent + :param highlight_mask: tiles to highlight + """ + + if highlight_mask is None: + highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool) + + # Compute the total grid size + width_px = self.width * tile_size + height_px = self.height * tile_size + + img = np.zeros(shape=(height_px, width_px, 3), dtype=np.uint8) + + # Render the grid + for j in range(0, self.height): + for i in range(0, self.width): + cell = self.get(i, j) + + agent_here = np.array_equal(agent_pos, (i, j)) + tile_img = Grid.render_tile( + cell, + agent_dir=agent_dir if agent_here else None, + highlight=highlight_mask[i, j], + tile_size=tile_size + ) + + ymin = j * tile_size + ymax = (j + 1) * tile_size + xmin = i * tile_size + xmax = (i + 1) * tile_size + img[ymin:ymax, xmin:xmax, :] = tile_img + + return img + + def encode(self, vis_mask=None): + """ + Produce a compact numpy encoding of the grid + """ + + if vis_mask is None: + vis_mask = np.ones((self.width, self.height), dtype=bool) + + array = np.zeros((self.width, self.height, 3), dtype='uint8') + + for i in range(self.width): + for j in range(self.height): + if vis_mask[i, j]: + v = self.get(i, j) + + if v is None: + array[i, j, 0] = OBJECT_TO_IDX['empty'] + array[i, j, 1] = 0 + array[i, j, 2] = 0 + + else: + array[i, j, :] = v.encode() + + return array + + @staticmethod + def decode(array): + """ + Decode an array grid encoding back into a grid + """ + + width, height, channels = array.shape + assert channels == 3 + + vis_mask = np.ones(shape=(width, height), dtype=bool) + + grid = Grid(width, height) + for i in range(width): + for j in range(height): + type_idx, color_idx, state = array[i, j] + v = WorldObj.decode(type_idx, color_idx, state) + grid.set(i, j, v) + vis_mask[i, j] = (type_idx != OBJECT_TO_IDX['unseen']) + + return grid, vis_mask + + @staticmethod + def process_vis(grid, agent_pos): + mask = np.zeros(shape=(grid.width, grid.height), dtype=bool) + + mask[agent_pos[0], agent_pos[1]] = True + + for j in reversed(range(0, grid.height)): + for i in range(0, grid.width - 1): + if not mask[i, j]: + continue + + cell = grid.get(i, j) + if cell and not cell.see_behind(): + continue + + mask[i + 1, j] = True + if j > 0: + mask[i + 1, j - 1] = True + mask[i, j - 1] = True + + for i in reversed(range(1, grid.width)): + if not mask[i, j]: + continue + + cell = grid.get(i, j) + if cell and not cell.see_behind(): + continue + + mask[i - 1, j] = True + if j > 0: + mask[i - 1, j - 1] = True + mask[i, j - 1] = True + + for j in range(0, grid.height): + for i in range(0, grid.width): + if not mask[i, j]: + grid.set(i, j, None) + + return mask + + +class MiniGridEnv(gym.Env): + """ + 2D grid world game environment + """ + + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second': 10 + } + + # Enumeration of possible actions + class Actions(IntEnum): + # Turn left, turn right, move forward + left = 0 + right = 1 + forward = 2 + + # Pick up an object + pickup = 3 + + # Drop an object + drop = 4 + + # Toggle/activate an object + toggle = 5 + + # Done completing task + done = 6 + + def __init__( + self, + grid_size=None, + width=None, + height=None, + max_steps=100, + see_through_walls=False, + seed=1337, + agent_view_size=7 + ): + # Can't set both grid_size and width/height + if grid_size: + assert width is None and height is None + width = grid_size + height = grid_size + + # Action enumeration for this environment + self.actions = MiniGridEnv.Actions + + # Actions are discrete integer values + self.action_space = spaces.Discrete(len(self.actions)) + + # Number of cells (width and height) in the agent view + assert agent_view_size % 2 == 1 + assert agent_view_size >= 3 + self.agent_view_size = agent_view_size + + # Observations are dictionaries containing an + # encoding of the grid and a textual 'mission' string + self.observation_space = spaces.Box( + low=0, + high=255, + shape=(self.agent_view_size, self.agent_view_size, 3), + dtype='uint8' + ) + self.observation_space = spaces.Dict({ + 'image': self.observation_space + }) + + # Range of possible rewards + self.reward_range = (0, 1) + + # Window to use for human rendering mode + self.window = None + + # Environment configuration + self.width = width + self.height = height + self.max_steps = max_steps + self.see_through_walls = see_through_walls + + # Current position and direction of the agent + self.agent_pos = None + self.agent_dir = None + self.carrying = None + + # Step count since episode start + self.step_count = None + + # Seed the random number generator + self.np_random = None + + # Initialize the RNG + self.seed(seed=seed) + + # Initialize the state + self.reset() + + def reset(self): + # Current position and direction of the agent + self.agent_pos = None + self.agent_dir = None + + # Generate a new random grid at the start of each episode + # To keep the same grid for each episode, call env.seed() with + # the same seed before calling env.reset() + self._gen_grid(self.width, self.height) + + # These fields should be defined by _gen_grid + assert self.agent_pos is not None + assert self.agent_dir is not None + + # Check that the agent doesn't overlap with an object + start_cell = self.grid.get(*self.agent_pos) + assert start_cell is None or start_cell.can_overlap() + + # Item picked up, being carried, initially nothing + self.carrying = None + + # Step count since episode start + self.step_count = 0 + + # Return first observation + obs = self.gen_obs() + return obs + + def seed(self, seed=1337): + # Seed the random number generator + self.np_random, _ = seeding.np_random(seed) + return [seed] + + def hash(self, size=16): + """Compute a hash that uniquely identifies the current state of the environment. + :param size: Size of the hashing + """ + sample_hash = hashlib.sha256() + + to_encode = [self.grid.encode().tolist(), self.agent_pos, self.agent_dir] + for item in to_encode: + sample_hash.update(str(item).encode('utf8')) + + return sample_hash.hexdigest()[:size] + + @property + def steps_remaining(self): + return self.max_steps - self.step_count + + def __str__(self): + """ + Produce a pretty string of the environment's grid along with the agent. + A grid cell is represented by 2-character string, the first one for + the object and the second one for the color. + """ + + # Map of object types to short string + OBJECT_TO_STR = { + 'wall': 'W', + 'floor': 'F', + 'door': 'D', + 'key': 'K', + 'ball': 'A', + 'box': 'B', + 'goal': 'G', + 'lava': 'V', + } + + # Map agent's direction to short string + AGENT_DIR_TO_STR = { + 0: '>', + 1: 'V', + 2: '<', + 3: '^' + } + + str = '' + + for j in range(self.grid.height): + + for i in range(self.grid.width): + if i == self.agent_pos[0] and j == self.agent_pos[1]: + str += 2 * AGENT_DIR_TO_STR[self.agent_dir] + continue + + c = self.grid.get(i, j) + + if c is None: + str += ' ' + continue + + if c.type == 'door': + if c.is_open: + str += '__' + elif c.is_locked: + str += 'L' + c.color[0].upper() + else: + str += 'D' + c.color[0].upper() + continue + + str += OBJECT_TO_STR[c.type] + c.color[0].upper() + + if j < self.grid.height - 1: + str += '\n' + + return str + + def _gen_grid(self, width, height): + assert False, "_gen_grid needs to be implemented by each environment" + + def _reward(self): + """ + Compute the reward to be given upon success + """ + + return 1 - 0.9 * (self.step_count / self.max_steps) + + def _rand_int(self, low, high): + """ + Generate random integer in [low,high[ + """ + + return self.np_random.randint(low, high) + + def _rand_float(self, low, high): + """ + Generate random float in [low,high[ + """ + + return self.np_random.uniform(low, high) + + def _rand_bool(self): + """ + Generate random boolean value + """ + + return self.np_random.randint(0, 2) == 0 + + def _rand_elem(self, iterable): + """ + Pick a random element in a list + """ + + lst = list(iterable) + idx = self._rand_int(0, len(lst)) + return lst[idx] + + def _rand_subset(self, iterable, num_elems): + """ + Sample a random subset of distinct elements of a list + """ + + lst = list(iterable) + assert num_elems <= len(lst) + + out = [] + + while len(out) < num_elems: + elem = self._rand_elem(lst) + lst.remove(elem) + out.append(elem) + + return out + + def _rand_color(self): + """ + Generate a random color name (string) + """ + + return self._rand_elem(COLOR_NAMES) + + def _rand_pos(self, xLow, xHigh, yLow, yHigh): + """ + Generate a random (x,y) position tuple + """ + + return ( + self.np_random.randint(xLow, xHigh), + self.np_random.randint(yLow, yHigh) + ) + + def place_obj(self, + obj, + top=None, + size=None, + reject_fn=None, + max_tries=math.inf + ): + """ + Place an object at an empty position in the grid + + :param obj: Object to place + :param top: top-left position of the rectangle where to place + :param size: size of the rectangle where to place + :param reject_fn: function to filter out potential positions + :param max_tries: maximum number of tries (no effect if top is not None) + """ + + if top is None: + top = (0, 0) + else: + top = (max(top[0], 0), max(top[1], 0)) + + if size is None: + size = (self.grid.width, self.grid.height) + + num_tries = 0 + + while True: + # This is to handle with rare cases where rejection sampling + # gets stuck in an infinite loop + if num_tries > max_tries: + raise RecursionError('rejection sampling failed in place_obj') + + num_tries += 1 + + pos = np.array(( + self._rand_int(top[0], min(top[0] + size[0], self.grid.width)), + self._rand_int(top[1], min(top[1] + size[1], self.grid.height)) + )) + + # Don't place the object on top of another object + if self.grid.get(*pos) is not None: + continue + + # Don't place the object where the agent is + if np.array_equal(pos, self.agent_pos): + continue + + # Check if there is a filtering criterion + if reject_fn and reject_fn(self, pos): + continue + + break + + self.grid.set(*pos, obj) + + if obj is not None: + obj.init_pos = pos + obj.cur_pos = pos + + return pos + + def put_obj(self, obj, i, j): + """ + Put an object at a specific position in the grid + """ + + self.grid.set(i, j, obj) + obj.init_pos = (i, j) + obj.cur_pos = (i, j) + + def place_agent( + self, + top=None, + size=None, + rand_dir=True, + max_tries=math.inf + ): + """ + Set the agent's starting point at an empty position in the grid + """ + + self.agent_pos = None + pos = self.place_obj(None, top, size, max_tries=max_tries) + self.agent_pos = pos + + if rand_dir: + self.agent_dir = self._rand_int(0, 4) + + return pos + + @property + def dir_vec(self): + """ + Get the direction vector for the agent, pointing in the direction + of forward movement. + """ + + assert 0 <= self.agent_dir < 4 + return DIR_TO_VEC[self.agent_dir] + + @property + def right_vec(self): + """ + Get the vector pointing to the right of the agent. + """ + + dx, dy = self.dir_vec + return np.array((-dy, dx)) + + @property + def front_pos(self): + """ + Get the position of the cell that is right in front of the agent + """ + + return self.agent_pos + self.dir_vec + + def get_view_coords(self, i, j): + """ + Translate and rotate absolute grid coordinates (i, j) into the + agent's partially observable view (sub-grid). Note that the resulting + coordinates may be negative or outside of the agent's view size. + """ + + ax, ay = self.agent_pos + dx, dy = self.dir_vec + rx, ry = self.right_vec + + # Compute the absolute coordinates of the top-left view corner + sz = self.agent_view_size + hs = self.agent_view_size // 2 + tx = ax + (dx * (sz - 1)) - (rx * hs) + ty = ay + (dy * (sz - 1)) - (ry * hs) + + lx = i - tx + ly = j - ty + + # Project the coordinates of the object relative to the top-left + # corner onto the agent's own coordinate system + vx = (rx * lx + ry * ly) + vy = -(dx * lx + dy * ly) + + return vx, vy + + def get_view_exts(self): + """ + Get the extents of the square set of tiles visible to the agent + Note: the bottom extent indices are not included in the set + """ + + # Facing right + if self.agent_dir == 0: + topX = self.agent_pos[0] + topY = self.agent_pos[1] - self.agent_view_size // 2 + # Facing down + elif self.agent_dir == 1: + topX = self.agent_pos[0] - self.agent_view_size // 2 + topY = self.agent_pos[1] + # Facing left + elif self.agent_dir == 2: + topX = self.agent_pos[0] - self.agent_view_size + 1 + topY = self.agent_pos[1] - self.agent_view_size // 2 + # Facing up + elif self.agent_dir == 3: + topX = self.agent_pos[0] - self.agent_view_size // 2 + topY = self.agent_pos[1] - self.agent_view_size + 1 + else: + assert False, "invalid agent direction" + + botX = topX + self.agent_view_size + botY = topY + self.agent_view_size + + return topX, topY, botX, botY + + def relative_coords(self, x, y): + """ + Check if a grid position belongs to the agent's field of view, and returns the corresponding coordinates + """ + + vx, vy = self.get_view_coords(x, y) + + if vx < 0 or vy < 0 or vx >= self.agent_view_size or vy >= self.agent_view_size: + return None + + return vx, vy + + def in_view(self, x, y): + """ + check if a grid position is visible to the agent + """ + + return self.relative_coords(x, y) is not None + + def agent_sees(self, x, y): + """ + Check if a non-empty grid position is visible to the agent + """ + + coordinates = self.relative_coords(x, y) + if coordinates is None: + return False + vx, vy = coordinates + + obs = self.gen_obs() + obs_grid, _ = Grid.decode(obs['image']) + obs_cell = obs_grid.get(vx, vy) + world_cell = self.grid.get(x, y) + + return obs_cell is not None and obs_cell.type == world_cell.type + + def step(self, action): + self.step_count += 1 + + reward = 0 + done = False + + # Get the position in front of the agent + fwd_pos = self.front_pos + + # Get the contents of the cell in front of the agent + fwd_cell = self.grid.get(*fwd_pos) + + # Rotate left + if action == self.actions.left: + self.agent_dir -= 1 + if self.agent_dir < 0: + self.agent_dir += 4 + + # Rotate right + elif action == self.actions.right: + self.agent_dir = (self.agent_dir + 1) % 4 + + # Move forward + elif action == self.actions.forward: + if fwd_cell is None or fwd_cell.can_overlap(): + self.agent_pos = fwd_pos + if fwd_cell is not None and fwd_cell.type == 'goal': + done = True + reward = self._reward() + if fwd_cell is not None and fwd_cell.type == 'lava': + done = True + + # Pick up an object + elif action == self.actions.pickup: + if fwd_cell and fwd_cell.can_pickup(): + if self.carrying is None: + self.carrying = fwd_cell + self.carrying.cur_pos = np.array([-1, -1]) + self.grid.set(*fwd_pos, None) + + # Drop an object + elif action == self.actions.drop: + if not fwd_cell and self.carrying: + self.grid.set(*fwd_pos, self.carrying) + self.carrying.cur_pos = fwd_pos + self.carrying = None + + # Toggle/activate an object + elif action == self.actions.toggle: + if fwd_cell: + fwd_cell.toggle(self, fwd_pos) + + # Done action (not used by default) + elif action == self.actions.done: + pass + + else: + assert False, "unknown action" + + if self.step_count >= self.max_steps: + done = True + + obs = self.gen_obs() + + return obs, reward, done, {} + + def gen_obs_grid(self): + """ + Generate the sub-grid observed by the agent. + This method also outputs a visibility mask telling us which grid + cells the agent can actually see. + """ + + topX, topY, botX, botY = self.get_view_exts() + + grid = self.grid.slice(topX, topY, self.agent_view_size, self.agent_view_size) + + for i in range(self.agent_dir + 1): + grid = grid.rotate_left() + + # Process occluders and visibility + # Note that this incurs some performance cost + if not self.see_through_walls: + vis_mask = grid.process_vis(agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1)) + else: + vis_mask = np.ones(shape=(grid.width, grid.height), dtype=bool) + + # Make it so the agent sees what it's carrying + # We do this by placing the carried object at the agent's position + # in the agent's partially observable view + agent_pos = grid.width // 2, grid.height - 1 + if self.carrying: + grid.set(*agent_pos, self.carrying) + else: + grid.set(*agent_pos, None) + + return grid, vis_mask + + def gen_obs(self): + """ + Generate the agent's view (partially observable, low-resolution encoding) + """ + + grid, vis_mask = self.gen_obs_grid() + + # Encode the partially observable view into a numpy array + image = grid.encode(vis_mask) + + assert hasattr(self, 'mission'), "environments must define a textual mission string" + + # Observations are dictionaries containing: + # - an image (partially observable view of the environment) + # - the agent's direction/orientation (acting as a compass) + # - a textual mission string (instructions for the agent) + obs = { + 'image': image, + 'direction': self.agent_dir, + 'mission': self.mission + } + + return obs + + def get_obs_render(self, obs, tile_size=TILE_PIXELS // 2): + """ + Render an agent observation for visualization + """ + + grid, vis_mask = Grid.decode(obs) + + # Render the whole grid + img = grid.render( + tile_size, + agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1), + agent_dir=3, + highlight_mask=vis_mask + ) + + return img + + def render(self, mode='human', close=False, highlight=True, tile_size=TILE_PIXELS): + """ + Render the whole-grid human view + """ + + if close: + if self.window: + self.window.close() + return + + if mode == 'human' and not self.window: + import minigrid.window + self.window = minigrid.window.Window('minigrid') + self.window.show(block=False) + + # Compute which cells are visible to the agent + _, vis_mask = self.gen_obs_grid() + + # Compute the world coordinates of the bottom-left corner + # of the agent's view area + f_vec = self.dir_vec + r_vec = self.right_vec + top_left = self.agent_pos + f_vec * (self.agent_view_size - 1) - r_vec * (self.agent_view_size // 2) + + # Mask of which cells to highlight + highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool) + + # For each cell in the visibility mask + for vis_j in range(0, self.agent_view_size): + for vis_i in range(0, self.agent_view_size): + # If this cell is not visible, don't highlight it + if not vis_mask[vis_i, vis_j]: + continue + + # Compute the world coordinates of this cell + abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i) + + if abs_i < 0 or abs_i >= self.width: + continue + if abs_j < 0 or abs_j >= self.height: + continue + + # Mark this cell to be highlighted + highlight_mask[abs_i, abs_j] = True + + # Render the whole grid + img = self.grid.render( + tile_size, + self.agent_pos, + self.agent_dir, + highlight_mask=highlight_mask if highlight else None + ) + + if mode == 'human': + self.window.set_caption(self.mission) + self.window.show_img(img) + + return img + + def close(self): + if self.window: + self.window.close() + return diff --git a/environments/minigrid/src/minigrid/rendering.py b/environments/minigrid/src/minigrid/rendering.py new file mode 100644 index 00000000..6198c92a --- /dev/null +++ b/environments/minigrid/src/minigrid/rendering.py @@ -0,0 +1,126 @@ +import math +import numpy as np + + +def downsample(img, factor): + """ + Downsample an image along both dimensions by some factor + """ + + assert img.shape[0] % factor == 0 + assert img.shape[1] % factor == 0 + + img = img.reshape([img.shape[0]//factor, factor, img.shape[1]//factor, factor, 3]) + img = img.mean(axis=3) + img = img.mean(axis=1) + + return img + + +def fill_coords(img, fn, color): + """ + Fill pixels of an image with coordinates matching a filter function + """ + + for y in range(img.shape[0]): + for x in range(img.shape[1]): + yf = (y + 0.5) / img.shape[0] + xf = (x + 0.5) / img.shape[1] + if fn(xf, yf): + img[y, x] = color + + return img + + +def rotate_fn(fin, cx, cy, theta): + def fout(x, y): + x = x - cx + y = y - cy + + x2 = cx + x * math.cos(-theta) - y * math.sin(-theta) + y2 = cy + y * math.cos(-theta) + x * math.sin(-theta) + + return fin(x2, y2) + + return fout + + +def point_in_line(x0, y0, x1, y1, r): + p0 = np.array([x0, y0]) + p1 = np.array([x1, y1]) + dir = p1 - p0 + dist = np.linalg.norm(dir) + dir = dir / dist + + xmin = min(x0, x1) - r + xmax = max(x0, x1) + r + ymin = min(y0, y1) - r + ymax = max(y0, y1) + r + + def fn(x, y): + # Fast, early escape test + if x < xmin or x > xmax or y < ymin or y > ymax: + return False + + q = np.array([x, y]) + pq = q - p0 + + # Closest point on line + a = np.dot(pq, dir) + a = np.clip(a, 0, dist) + p = p0 + a * dir + + dist_to_line = np.linalg.norm(q - p) + return dist_to_line <= r + + return fn + + +def point_in_circle(cx, cy, r): + def fn(x, y): + return (x-cx)*(x-cx) + (y-cy)*(y-cy) <= r * r + return fn + + +def point_in_rect(xmin, xmax, ymin, ymax): + def fn(x, y): + return xmin <= x <= xmax and ymin <= y <= ymax + return fn + + +def point_in_triangle(a, b, c): + a = np.array(a) + b = np.array(b) + c = np.array(c) + + def fn(x, y): + v0 = c - a + v1 = b - a + v2 = np.array((x, y)) - a + + # Compute dot products + dot00 = np.dot(v0, v0) + dot01 = np.dot(v0, v1) + dot02 = np.dot(v0, v2) + dot11 = np.dot(v1, v1) + dot12 = np.dot(v1, v2) + + # Compute barycentric coordinates + inv_denom = 1 / (dot00 * dot11 - dot01 * dot01) + u = (dot11 * dot02 - dot01 * dot12) * inv_denom + v = (dot00 * dot12 - dot01 * dot02) * inv_denom + + # Check if point is in triangle + return (u >= 0) and (v >= 0) and (u + v) < 1 + + return fn + + +def highlight_img(img, color=(255, 255, 255), alpha=0.30): + """ + Add highlighting to an image + """ + + blend_img = img + alpha * (np.array(color, dtype=np.uint8) - img) + blend_img = blend_img.clip(0, 255).astype(np.uint8) + img[:, :, :] = blend_img diff --git a/environments/minigrid/src/minigrid/roomgrid.py b/environments/minigrid/src/minigrid/roomgrid.py new file mode 100644 index 00000000..601678d3 --- /dev/null +++ b/environments/minigrid/src/minigrid/roomgrid.py @@ -0,0 +1,403 @@ +from .minigrid import * + + +def reject_next_to(env, pos): + """ + Function to filter out object positions that are right next to + the agent's starting point + """ + + sx, sy = env.agent_pos + x, y = pos + d = abs(sx - x) + abs(sy - y) + return d < 2 + + +class Room: + def __init__( + self, + top, + size + ): + # Top-left corner and size (tuples) + self.top = top + self.size = size + + # List of door objects and door positions + # Order of the doors is right, down, left, up + self.doors = [None] * 4 + self.door_pos = [None] * 4 + + # List of rooms adjacent to this one + # Order of the neighbors is right, down, left, up + self.neighbors = [None] * 4 + + # Indicates if this room is behind a locked door + self.locked = False + + # List of objects contained + self.objs = [] + + def rand_pos(self, env): + topX, topY = self.top + sizeX, sizeY = self.size + return env._randPos( + topX + 1, topX + sizeX - 1, + topY + 1, topY + sizeY - 1 + ) + + def pos_inside(self, x, y): + """ + Check if a position is within the bounds of this room + """ + + topX, topY = self.top + sizeX, sizeY = self.size + + if x < topX or y < topY: + return False + + if x >= topX + sizeX or y >= topY + sizeY: + return False + + return True + + +class RoomGrid(MiniGridEnv): + """ + Environment with multiple rooms and random objects. + This is meant to serve as a base class for other environments. + """ + + def __init__( + self, + room_size=7, + num_rows=3, + num_cols=3, + max_steps=100, + seed=0, + agent_view_size=7 + ): + assert room_size > 0 + assert room_size >= 3 + assert num_rows > 0 + assert num_cols > 0 + self.room_size = room_size + self.num_rows = num_rows + self.num_cols = num_cols + + height = (room_size - 1) * num_rows + 1 + width = (room_size - 1) * num_cols + 1 + + # By default, this environment has no mission + self.mission = '' + + super().__init__( + width=width, + height=height, + max_steps=max_steps, + see_through_walls=False, + seed=seed, + agent_view_size=agent_view_size + ) + + def room_from_pos(self, x, y): + """Get the room a given position maps to""" + + assert x >= 0 + assert y >= 0 + + i = x // (self.room_size-1) + j = y // (self.room_size-1) + + assert i < self.num_cols + assert j < self.num_rows + + return self.room_grid[j][i] + + def get_room(self, i, j): + assert i < self.num_cols + assert j < self.num_rows + return self.room_grid[j][i] + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + self.room_grid = [] + + # For each row of rooms + for j in range(0, self.num_rows): + row = [] + + # For each column of rooms + for i in range(0, self.num_cols): + room = Room( + (i * (self.room_size-1), j * (self.room_size-1)), + (self.room_size, self.room_size) + ) + row.append(room) + + # Generate the walls for this room + self.grid.wall_rect(*room.top, *room.size) + + self.room_grid.append(row) + + # For each row of rooms + for j in range(0, self.num_rows): + # For each column of rooms + for i in range(0, self.num_cols): + room = self.room_grid[j][i] + + x_l, y_l = (room.top[0] + 1, room.top[1] + 1) + x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) + + # Door positions, order is right, down, left, up + if i < self.num_cols - 1: + room.neighbors[0] = self.room_grid[j][i+1] + room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) + if j < self.num_rows - 1: + room.neighbors[1] = self.room_grid[j+1][i] + room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) + if i > 0: + room.neighbors[2] = self.room_grid[j][i-1] + room.door_pos[2] = room.neighbors[2].door_pos[0] + if j > 0: + room.neighbors[3] = self.room_grid[j-1][i] + room.door_pos[3] = room.neighbors[3].door_pos[1] + + # The agent starts in the middle, facing right + self.agent_pos = ( + (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), + (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) + ) + self.agent_dir = 0 + + def place_in_room(self, i, j, obj): + """ + Add an existing object to room (i, j) + """ + + room = self.get_room(i, j) + + pos = self.place_obj( + obj, + room.top, + room.size, + reject_fn=reject_next_to, + max_tries=1000 + ) + + room.objs.append(obj) + + return obj, pos + + def add_object(self, i, j, kind=None, color=None): + """ + Add a new object to room (i, j) + """ + + if kind is None: + kind = self._rand_elem(['key', 'ball', 'box']) + + if color is None: + color = self._rand_color() + + # TODO: we probably want to add an Object.make helper function + assert kind in ['key', 'ball', 'box'] + obj = None + if kind == 'key': + obj = Key(color) + elif kind == 'ball': + obj = Ball(color) + elif kind == 'box': + obj = Box(color) + + return self.place_in_room(i, j, obj) + + def add_door(self, i, j, door_idx=None, color=None, locked=None): + """ + Add a door to a room, connecting it to a neighbor + """ + + room = self.get_room(i, j) + + if door_idx is None: + # Need to make sure that there is a neighbor along this wall + # and that there is not already a door + while True: + door_idx = self._rand_int(0, 4) + if room.neighbors[door_idx] and room.doors[door_idx] is None: + break + + if color is None: + color = self._rand_color() + + if locked is None: + locked = self._rand_bool() + + assert room.doors[door_idx] is None, "door already exists" + + room.locked = locked + door = Door(color, is_locked=locked) + + pos = room.door_pos[door_idx] + self.grid.set(*pos, door) + door.cur_pos = pos + + neighbor = room.neighbors[door_idx] + room.doors[door_idx] = door + neighbor.doors[(door_idx+2) % 4] = door + + return door, pos + + def remove_wall(self, i, j, wall_idx): + """ + Remove a wall between two rooms + """ + + room = self.get_room(i, j) + + assert 0 <= wall_idx < 4 + assert room.doors[wall_idx] is None, "door exists on this wall" + assert room.neighbors[wall_idx], "invalid wall" + + neighbor = room.neighbors[wall_idx] + + tx, ty = room.top + w, h = room.size + + # Ordering of walls is right, down, left, up + if wall_idx == 0: + for i in range(1, h - 1): + self.grid.set(tx + w - 1, ty + i, None) + elif wall_idx == 1: + for i in range(1, w - 1): + self.grid.set(tx + i, ty + h - 1, None) + elif wall_idx == 2: + for i in range(1, h - 1): + self.grid.set(tx, ty + i, None) + elif wall_idx == 3: + for i in range(1, w - 1): + self.grid.set(tx + i, ty, None) + else: + assert False, "invalid wall index" + + # Mark the rooms as connected + room.doors[wall_idx] = True + neighbor.doors[(wall_idx+2) % 4] = True + + def place_agent(self, i=None, j=None, rand_dir=True): + """ + Place the agent in a room + """ + + if i is None: + i = self._rand_int(0, self.num_cols) + if j is None: + j = self._rand_int(0, self.num_rows) + + room = self.room_grid[j][i] + + # Find a position that is not right in front of an object + while True: + super().place_agent(room.top, room.size, rand_dir, max_tries=1000) + front_cell = self.grid.get(*self.front_pos) + if front_cell is None or front_cell.type == 'wall': + break + + return self.agent_pos + + def connect_all(self, door_colors=COLOR_NAMES, max_itrs=5000): + """ + Make sure that all rooms are reachable by the agent from its + starting position + """ + + start_room = self.room_from_pos(*self.agent_pos) + + added_doors = [] + + def find_reach(): + reach = set() + stack = [start_room] + while len(stack) > 0: + room = stack.pop() + if room in reach: + continue + reach.add(room) + for i in range(0, 4): + if room.doors[i]: + stack.append(room.neighbors[i]) + return reach + + num_itrs = 0 + + while True: + # This is to handle rare situations where random sampling produces + # a level that cannot be connected, producing in an infinite loop + if num_itrs > max_itrs: + raise RecursionError('connect_all failed') + num_itrs += 1 + + # If all rooms are reachable, stop + reach = find_reach() + if len(reach) == self.num_rows * self.num_cols: + break + + # Pick a random room and door position + i = self._rand_int(0, self.num_cols) + j = self._rand_int(0, self.num_rows) + k = self._rand_int(0, 4) + room = self.get_room(i, j) + + # If there is already a door there, skip + if not room.door_pos[k] or room.doors[k]: + continue + + if room.locked or room.neighbors[k].locked: + continue + + color = self._rand_elem(door_colors) + door, _ = self.add_door(i, j, k, color, False) + added_doors.append(door) + + return added_doors + + def add_distractors(self, i=None, j=None, num_distractors=10, all_unique=True): + """ + Add random objects that can potentially distract/confuse the agent. + """ + + # Collect a list of existing objects + objs = [] + for row in self.room_grid: + for room in row: + for obj in room.objs: + objs.append((obj.type, obj.color)) + + # List of distractors added + dists = [] + + while len(dists) < num_distractors: + color = self._rand_elem(COLOR_NAMES) + type = self._rand_elem(['key', 'ball', 'box']) + obj = (type, color) + + if all_unique and obj in objs: + continue + + # Add the object to a random room if no room specified + room_i = i + room_j = j + if room_i is None: + room_i = self._rand_int(0, self.num_cols) + if room_j is None: + room_j = self._rand_int(0, self.num_rows) + + dist, pos = self.add_object(room_i, room_j, *obj) + + objs.append(obj) + dists.append(dist) + + return dists diff --git a/environments/minigrid/src/minigrid/simenv_minigrid.py b/environments/minigrid/src/minigrid/simenv_minigrid.py new file mode 100644 index 00000000..e4c14bdd --- /dev/null +++ b/environments/minigrid/src/minigrid/simenv_minigrid.py @@ -0,0 +1,53 @@ +import simenv as sm + + +class Goal: + def __init__(self): + pass + + +class Floor: + def __init__(self): + pass + + +class Lava: + def __init__(self): + pass + + +class Wall: + def __init__(self): + pass + + +class Door: + def __init__(self): + pass + + +class Key: + def __init__(self): + pass + + +class Box: + def __init__(self): + pass + + +class MiniGridEnv: + + def __init__(self, scene: sm.Scene, width, height): + self.scene = scene + self.width = width + self.height = height + self.tile_size = 32 + + scene += sm.Camera(camera_type="orthographic", width=800, height=800) + scene += sm.Light() + + for i in range(width): + for k in range(height): + self.scene += sm.Box(name=f"floor{i * height + k}", position=[i, k, 0], material=sm.Material.BLACK) + diff --git a/integrations/Blender/simenv_blender/__init__.py b/integrations/Blender/simenv_blender/__init__.py index 2a5431f9..329cc5ef 100644 --- a/integrations/Blender/simenv_blender/__init__.py +++ b/integrations/Blender/simenv_blender/__init__.py @@ -11,27 +11,29 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -bl_info = { - "name" : "simenv", - "author" : "Hugging Face", - "description" : "", - "blender" : (3, 2, 0), - "version" : (0, 0, 1), - "location" : "View3D", - "warning" : "", - "category" : "Simulation" -} - import bpy from .simenv_op import SIMENV_OT_ImportScene from .simenv_pnl import SIMENV_PT_Panel +bl_info = { + "name": "simenv", + "author": "Hugging Face", + "description": "", + "blender": (3, 2, 0), + "version": (0, 0, 1), + "location": "View3D", + "warning": "", + "category": "Simulation" +} + classes = (SIMENV_OT_ImportScene, SIMENV_PT_Panel) + def register(): for c in classes: bpy.utils.register_class(c) + def unregister(): for c in classes: bpy.utils.unregister_class(c) diff --git a/integrations/Blender/simenv_blender/client.py b/integrations/Blender/simenv_blender/client.py index 283b3a92..63ed9684 100644 --- a/integrations/Blender/simenv_blender/client.py +++ b/integrations/Blender/simenv_blender/client.py @@ -1,6 +1,4 @@ import socket -import json -import base64 class Client: diff --git a/integrations/Blender/simenv_blender/simenv_op.py b/integrations/Blender/simenv_blender/simenv_op.py index 746190bb..8ce88c59 100644 --- a/integrations/Blender/simenv_blender/simenv_op.py +++ b/integrations/Blender/simenv_blender/simenv_op.py @@ -1,4 +1,3 @@ -import bpy from bpy.types import Operator from .simulator import Simulator diff --git a/integrations/Blender/simenv_blender/simenv_pnl.py b/integrations/Blender/simenv_blender/simenv_pnl.py index f11df9a4..2361776d 100644 --- a/integrations/Blender/simenv_blender/simenv_pnl.py +++ b/integrations/Blender/simenv_blender/simenv_pnl.py @@ -1,6 +1,7 @@ import bpy from bpy.types import Panel + class SIMENV_PT_Panel(Panel): bl_space_type = "VIEW_3D" bl_region_type = "UI" diff --git a/integrations/Blender/simenv_blender/simulator.py b/integrations/Blender/simenv_blender/simulator.py index 55e06d23..3529872a 100644 --- a/integrations/Blender/simenv_blender/simulator.py +++ b/integrations/Blender/simenv_blender/simulator.py @@ -1,8 +1,6 @@ -from email import message import json import os import base64 -from pickle import BINPUT import bpy from .client import Client from pathlib import Path diff --git a/integrations/Godot/simenv-godot/Scenes/scene.tscn b/integrations/Godot/simenv-godot/Scenes/scene.tscn index 1bd497e3..705f7037 100644 --- a/integrations/Godot/simenv-godot/Scenes/scene.tscn +++ b/integrations/Godot/simenv-godot/Scenes/scene.tscn @@ -1,7 +1,7 @@ -[gd_scene load_steps=7 format=3 uid="uid://bpe4rf720wc7w"] +[gd_scene load_steps=7 format=3 uid="uid://bmr58mk5sqro3"] -[ext_resource type="Script" path="res://SimEnv/Simulator.gd" id="1_oo243"] -[ext_resource type="Script" path="res://SimEnv/Camera.gd" id="2_wa3b4"] +[ext_resource type="Script" path="res://SimEnv/simulator.gd" id="1_u0wd1"] +[ext_resource type="Script" path="res://SimEnv/camera.gd" id="2_3niri"] [sub_resource type="PhysicalSkyMaterial" id="PhysicalSkyMaterial_tieyq"] ground_color = Color(0.247059, 0.172549, 0.0823529, 1) @@ -21,7 +21,7 @@ glow_enabled = true [sub_resource type="CameraEffects" id="CameraEffects_noyl2"] [node name="Root" type="Node"] -script = ExtResource("1_oo243") +script = ExtResource("1_u0wd1") [node name="BaseWorld" type="Node3D" parent="."] @@ -30,7 +30,7 @@ transform = Transform3D(-0.597625, 0.452977, -0.661556, -1.49012e-08, 0.825113, environment = SubResource("Environment_ss83i") effects = SubResource("CameraEffects_noyl2") current = true -script = ExtResource("2_wa3b4") +script = ExtResource("2_3niri") [node name="DirectionalLight3D" type="DirectionalLight3D" parent="BaseWorld"] transform = Transform3D(-0.812844, -0.527265, 0.247543, -0.0232084, 0.453958, 0.890721, -0.58202, 0.718271, -0.381234, 0, 0, 0) diff --git a/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd b/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd index 973f25fd..ed1e8013 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd @@ -1,5 +1,15 @@ -extends Node class_name Client +extends Node +# This class sets up the TCP client to receive data +# from the Python SimEnv API through the TCP server +# +# Reading the stream is synchronized on the _physics_process +# Physics will only step if the command received tells it to do so +# (see SimEnv/Commands/step.gd) +# +# Data is received by chunks of _chunk_size +# _warmed_up is a hacky bugfix to start the TCP stream before the physics sync + signal connected signal data @@ -11,17 +21,21 @@ var _stream: StreamPeerTCP = StreamPeerTCP.new() var _chunk_size: int = 1024 var _warmed_up: bool = false + func _ready() -> void: _status = _stream.get_status() + func _physics_process(_delta): # this is called at a fixed rate update_status() if _status == _stream.STATUS_CONNECTED: + # to sync commands with the physics steps get_tree().paused = true read() + func update_status(): _stream.poll() var new_status: int = _stream.get_status() @@ -40,6 +54,7 @@ func update_status(): print("Error with socket stream.") emit_signal("error") + func read(): update_status() var available_bytes: int = _stream.get_available_bytes() @@ -66,6 +81,7 @@ func read(): else: get_tree().paused = false + func connect_to_host(host: String, port: int) -> void: print("Connecting to %s:%d" % [host, port]) if _status == _stream.STATUS_CONNECTED: @@ -76,6 +92,7 @@ func connect_to_host(host: String, port: int) -> void: _stream.disconnect_from_host() emit_signal("error") + func send(out_data: PackedByteArray) -> bool: if _status != _stream.STATUS_CONNECTED: print("Error: Stream is not currently connected.") diff --git a/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd b/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd index 87bede67..bc2b4317 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd @@ -1,11 +1,13 @@ -extends Node class_name Command +extends Node +# signal callback var content : Variant var _commands : Dictionary + func load_commands(): var directory: Directory = Directory.new() var com_path : String = "res://SimEnv/Commands" @@ -24,11 +26,13 @@ func load_commands(): directory.list_dir_end() + func execute(type: String) -> void: if type in _commands: _commands[type].execute(content) else: print("Unknown command.") - + + func _handle_callback(callback_data: PackedByteArray): emit_signal("callback", callback_data) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd deleted file mode 100644 index e3b10e71..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd +++ /dev/null @@ -1,15 +0,0 @@ -extends Node - -signal callback - -func execute(content) -> void: - var content_bytes : PackedByteArray = Marshalls.base64_to_raw(content["b64bytes"]) - - var gltf_state : GLTFState = GLTFState.new() - var gltf_doc : GLTFDocument = GLTFDocument.new() - - gltf_doc.append_from_buffer(content_bytes, "", gltf_state) - var gltf_scene = gltf_doc.generate_scene(gltf_state) - get_tree().current_scene.add_child(gltf_scene) - - emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd index 707cbdf8..b0b883d7 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd @@ -1,7 +1,9 @@ extends Node +# Close the application signal callback + func execute(_content): get_tree().quit() get_tree().paused = false diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd deleted file mode 100644 index 66e85fc9..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd +++ /dev/null @@ -1,6 +0,0 @@ -extends Node - -signal callback - -func execute(_content): - emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd deleted file mode 100644 index 66e85fc9..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd +++ /dev/null @@ -1,6 +0,0 @@ -extends Node - -signal callback - -func execute(_content): - emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd deleted file mode 100644 index 66e85fc9..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd +++ /dev/null @@ -1,6 +0,0 @@ -extends Node - -signal callback - -func execute(_content): - emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd index a2f5651c..2e1c8a8d 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd @@ -2,6 +2,7 @@ extends Node signal callback + func execute(_content): get_tree().paused = false emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd index a2f5651c..68c826a8 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd @@ -1,7 +1,10 @@ extends Node +# Handles the stepping of the simulation +# Unpause the application to run a step of _physics_process signal callback + func execute(_content): get_tree().paused = false emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd b/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd deleted file mode 100644 index e3bfaaba..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd +++ /dev/null @@ -1,5 +0,0 @@ -extends Node - - -func execute(content): - print(content) diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd deleted file mode 100644 index 87b29ae6..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd +++ /dev/null @@ -1,66 +0,0 @@ -extends Node -class_name Agent - - -class Actions: - var name: String - var dist: String - var available: Array = [] - var forward: float = 0.0 - var move_right: float = 0.0 - var turn_right: float = 0.0 - - func set_action(step_action: Array) -> void: - pass - - -class DiscreteActions: - extends Actions - - func set_action(step_action: Array) -> void: - var istep_action: int = int(step_action[0]) - forward = 0.0 - move_right = 0.0 - turn_right = 0.0 - - match available[istep_action]: - "move_foward": - forward = 1.0 - "move_backward": - forward = -1.0 - "move_left": - move_right = 1.0 - "move_right": - move_right = -1.0 - "turn_right": - turn_right = 1.0 - "turn_left": - turn_right = -1.0 - _: - print("Invalid action.") - - -class ContinuousActions: - extends Actions - - func set_action(step_action: Array) -> void: - for i in range(len(step_action)): - match available[i]: - "move_forward_backward": - forward = step_action[i] - "move_left_right": - move_right = step_action[i] - "turn_left_right": - turn_right = step_action[i] - _: - print("Invalid action.") - - -# Called when the node enters the scene tree for the first time. -func _ready(): - pass # Replace with function body. - - -# Called every frame. 'delta' is the elapsed time since the previous frame. -func _process(delta): - pass diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd deleted file mode 100644 index e08925d4..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd +++ /dev/null @@ -1,11 +0,0 @@ -extends Node - - -# Called when the node enters the scene tree for the first time. -func _ready(): - pass # Replace with function body. - - -# Called every frame. 'delta' is the elapsed time since the previous frame. -func _process(delta): - pass diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd deleted file mode 100644 index e08925d4..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd +++ /dev/null @@ -1,11 +0,0 @@ -extends Node - - -# Called when the node enters the scene tree for the first time. -func _ready(): - pass # Replace with function body. - - -# Called every frame. 'delta' is the elapsed time since the previous frame. -func _process(delta): - pass diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd deleted file mode 100644 index e08925d4..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd +++ /dev/null @@ -1,11 +0,0 @@ -extends Node - - -# Called when the node enters the scene tree for the first time. -func _ready(): - pass # Replace with function body. - - -# Called every frame. 'delta' is the elapsed time since the previous frame. -func _process(delta): - pass diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulator.gd b/integrations/Godot/simenv-godot/SimEnv/Simulator.gd index 74cc1a7b..ef11c5f6 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Simulator.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Simulator.gd @@ -1,13 +1,20 @@ +class_name Simulator extends Node +# Manage the global simulation process +# Get data from TCP client and call the command dispatch +# +# The core function of this class is to decode the data into json +# and send this json data to the commands const HOST : String = "127.0.0.1" const PORT : int = 55000 const RECONNECT_TIMEOUT: float = 3.0 +var agent + var _client : Client = Client.new() var _command : Command = Command.new() -var agent func _ready() -> void: _client.connect("connected", _handle_client_connected) @@ -22,13 +29,16 @@ func _ready() -> void: _command.load_commands() _client.connect_to_host(HOST, PORT) + func _connect_after_timeout(timeout: float) -> void: await get_tree().create_timer(timeout).timeout _client.connect_to_host(HOST, PORT) + func _handle_client_connected() -> void: print("Client connected to server.") + func _handle_client_data(data: PackedByteArray) -> void: var str_data : String = data.get_string_from_utf8() @@ -44,14 +54,17 @@ func _handle_client_data(data: PackedByteArray) -> void: else: print("Error parsing data.") + func _handle_client_disconnected() -> void: print("Client disconnected from server.") _connect_after_timeout(RECONNECT_TIMEOUT) + func _handle_client_error() -> void: print("Client error.") _connect_after_timeout(RECONNECT_TIMEOUT) + func _handle_callback(callback_data: PackedByteArray) -> void: print("Sending callback.") _client.send(callback_data) diff --git a/integrations/Godot/simenv-godot/project.godot b/integrations/Godot/simenv-godot/project.godot index efc1f3ac..ee265053 100644 --- a/integrations/Godot/simenv-godot/project.godot +++ b/integrations/Godot/simenv-godot/project.godot @@ -12,22 +12,34 @@ _global_script_classes=[{ "base": "Node", "class": &"Agent", "language": &"GDScript", -"path": "res://SimEnv/Simulation/Agent.gd" +"path": "res://SimEnv/RLAgents/agent.gd" +}, { +"base": "Node", +"class": &"AgentManager", +"language": &"GDScript", +"path": "res://SimEnv/RLAgents/agent_manager.gd" }, { "base": "Node", "class": &"Client", "language": &"GDScript", -"path": "res://SimEnv/Bridge/Client.gd" +"path": "res://SimEnv/Bridge/client.gd" }, { "base": "Node", "class": &"Command", "language": &"GDScript", -"path": "res://SimEnv/Bridge/Command.gd" +"path": "res://SimEnv/Bridge/command.gd" +}, { +"base": "Node", +"class": &"Simulator", +"language": &"GDScript", +"path": "res://SimEnv/simulator.gd" }] _global_script_class_icons={ "Agent": "", +"AgentManager": "", "Client": "", -"Command": "" +"Command": "", +"Simulator": "" } [application]