diff --git a/docs/envs/ported/sokoban.md b/docs/envs/ported/sokoban.md index f30127d6..efa7f9a0 100644 --- a/docs/envs/ported/sokoban.md +++ b/docs/envs/ported/sokoban.md @@ -1,6 +1,6 @@ # Sokoban -This family of environments is ported to MiniHack from [NetHack](https://github.com/heiner/nle), levels taken directly from Sokoban minigame inside NetHack, excluding monsters and items. The goal is to push boulder's to goal locations (pits or wholes). +This family of environments is ported to MiniHack from [NetHack](https://github.com/heiner/nle), levels taken directly from Sokoban minigame inside NetHack, excluding monsters and items. The goal is to push boulder's to goal locations (pits or holes). Original dat file can be seen [here](https://github.com/heiner/nle/blob/main/dat/sokoban.des). and corresponding solution from [NetHack Wiki](https://nethackwiki.com/wiki/Sokoban). @@ -9,19 +9,33 @@ An example of Sokoban level ported into MiniHack. ![](../imgs/sokoban3b.png) +## Available Actions +- Movement in 8 directions +- OPEN +- EAT +- PICKUP + + ## Reward -The agent receives a reward of +1 for reaching the stairs down and +0.1 for filling each pit. +The agent receives a reward of +1 for reaching the stairs down and +0.1 for filling each pit. Additionally, a small time penalty of -0.001 is applied at each step to encourage efficient solutions. + +## Game Mechanics +- The agent must navigate through the level, pushing boulders to fill pits +- Stepping into a pit results in death and the end of the episode +- Episodes are limited to 2000 steps by default +- The game is considered successfully completed when all pits are filled and the agent reaches the stairs + ## All Environments | Name | Capability | | ----------------------- | ---------- | -| `MiniHack-Sokoban1a-v0` | Planning | -| `MiniHack-Sokoban1b-v0` | Planning | -| `MiniHack-Sokoban2a-v0` | Planning | -| `MiniHack-Sokoban2b-v0` | Planning | -| `MiniHack-Sokoban3a-v0` | Planning | -| `MiniHack-Sokoban3b-v0` | Planning | -| `MiniHack-Sokoban4a-v0` | Planning | -| `MiniHack-Sokoban4b-v0` | Planning | +| `MiniHack-Sokoban1a-v1` | Planning | +| `MiniHack-Sokoban1b-v1` | Planning | +| `MiniHack-Sokoban2a-v1` | Planning | +| `MiniHack-Sokoban2b-v1` | Planning | +| `MiniHack-Sokoban3a-v1` | Planning | +| `MiniHack-Sokoban3b-v1` | Planning | +| `MiniHack-Sokoban4a-v1` | Planning | +| `MiniHack-Sokoban4b-v1` | Planning | diff --git a/minihack/dat/soko1a.des b/minihack/dat/soko1a.des index bb947ca8..c58376bd 100644 --- a/minihack/dat/soko1a.des +++ b/minihack/dat/soko1a.des @@ -47,3 +47,9 @@ TRAP:"pit",(04,10) TRAP:"pit",(05,10) TRAP:"pit",(06,10) TRAP:"pit",(07,10) + +# Random objects +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random \ No newline at end of file diff --git a/minihack/dat/soko1b.des b/minihack/dat/soko1b.des index 92ff084d..7ca48041 100644 --- a/minihack/dat/soko1b.des +++ b/minihack/dat/soko1b.des @@ -48,3 +48,9 @@ TRAP:"pit",(03,08) TRAP:"pit",(04,08) TRAP:"pit",(05,08) TRAP:"pit",(06,08) + +# Random objects +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random \ No newline at end of file diff --git a/minihack/dat/soko2a.des b/minihack/dat/soko2a.des index 5b9a86b3..16261fb5 100644 --- a/minihack/dat/soko2a.des +++ b/minihack/dat/soko2a.des @@ -56,3 +56,9 @@ TRAP:"pit",(20,10) TRAP:"pit",(21,10) TRAP:"pit",(22,10) TRAP:"pit",(23,10) + +# Random objects +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random \ No newline at end of file diff --git a/minihack/dat/soko2b.des b/minihack/dat/soko2b.des index 09276628..674b0a3b 100644 --- a/minihack/dat/soko2b.des +++ b/minihack/dat/soko2b.des @@ -64,3 +64,9 @@ TRAP:"pit",(23,10) TRAP:"pit",(24,10) TRAP:"pit",(25,10) TRAP:"pit",(26,10) + +# Random objects +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random \ No newline at end of file diff --git a/minihack/dat/soko3a.des b/minihack/dat/soko3a.des index 4670a20b..e98d0998 100644 --- a/minihack/dat/soko3a.des +++ b/minihack/dat/soko3a.des @@ -53,3 +53,9 @@ TRAP:"pit",(14,09) TRAP:"pit",(15,09) TRAP:"pit",(16,09) TRAP:"pit",(17,09) + +# Random objects +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random \ No newline at end of file diff --git a/minihack/dat/soko3b.des b/minihack/dat/soko3b.des index 924c70fd..d9add4a0 100644 --- a/minihack/dat/soko3b.des +++ b/minihack/dat/soko3b.des @@ -55,3 +55,8 @@ TRAP:"pit",(15,11) TRAP:"pit",(16,11) TRAP:"pit",(17,11) +# Random objects +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random \ No newline at end of file diff --git a/minihack/dat/soko4a.des b/minihack/dat/soko4a.des index 41f4f51c..8e4ee5da 100644 --- a/minihack/dat/soko4a.des +++ b/minihack/dat/soko4a.des @@ -76,3 +76,9 @@ DOOR:closed,(23,13) DOOR:closed,(17,11) DOOR:closed,(17,13) DOOR:closed,(17,15) + +# Random objects +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random \ No newline at end of file diff --git a/minihack/dat/soko4b.des b/minihack/dat/soko4b.des index 9cc7073c..1387153d 100644 --- a/minihack/dat/soko4b.des +++ b/minihack/dat/soko4b.des @@ -77,4 +77,10 @@ TRAP:"pit",(22,01) DOOR:closed,(23,12) DOOR:closed,(17,10) DOOR:closed,(17,12) -DOOR:closed,(17,14) \ No newline at end of file +DOOR:closed,(17,14) + +# Random objects +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random +OBJECT:'%',random \ No newline at end of file diff --git a/minihack/envs/sokoban.py b/minihack/envs/sokoban.py index bbf4949a..ff05b2d2 100644 --- a/minihack/envs/sokoban.py +++ b/minihack/envs/sokoban.py @@ -8,8 +8,8 @@ MOVE_ACTIONS = tuple(nethack.CompassDirection) NAVIGATE_ACTIONS = MOVE_ACTIONS + ( nethack.Command.OPEN, - nethack.Command.KICK, - nethack.Command.SEARCH, + nethack.Command.EAT, + nethack.Command.PICKUP, ) @@ -18,16 +18,16 @@ def __init__(self, *args, **kwargs): kwargs["max_episode_steps"] = kwargs.pop("max_episode_steps", 2000) kwargs["actions"] = kwargs.pop("actions", NAVIGATE_ACTIONS) - self._time_penalty = kwargs.pop("penalty_time", 0) + self._time_penalty = kwargs.pop("penalty_time", -0.001) self._reward_shaping_coefficient = kwargs.pop( - "reward_shaping_coefficient", 0 + "reward_shaping_coefficient", 0.1 ) super().__init__(*args, **kwargs) - def step(self, action: int): - self._current_pits = self._object_positions(self.last_observation, "^") - return super().step(action) + @property + def current_pits(self): + return self._object_positions(self.last_observation, "^") def _is_episode_end(self, observation): result = super()._is_episode_end(observation) @@ -38,8 +38,8 @@ def _is_episode_end(self, observation): return self.StepStatus.RUNNING # stepping into a pit should result in death - agent_pos = list(self._object_positions(observation, "@"))[0] - if any([agent_pos == pos for pos in self._current_pits]): + agent_pos = next((pos for pos in self._object_positions(observation, "@")), None) + if any([agent_pos == pos for pos in self.current_pits]): return self.StepStatus.DEATH return result @@ -68,89 +68,73 @@ def _object_positions(self, observation, object_char): class MiniHackSokoban1a(Sokoban): def __init__(self, *args, **kwargs): - kwargs["reward_shaping_coefficient"] = 0.1 - kwargs["penalty_time"] = -0.001 super().__init__(*args, des_file="soko1a.des", **kwargs) class MiniHackSokoban1b(Sokoban): def __init__(self, *args, **kwargs): - kwargs["reward_shaping_coefficient"] = 0.1 - kwargs["penalty_time"] = -0.001 super().__init__(*args, des_file="soko1b.des", **kwargs) class MiniHackSokoban2a(Sokoban): def __init__(self, *args, **kwargs): - kwargs["reward_shaping_coefficient"] = 0.1 - kwargs["penalty_time"] = -0.001 super().__init__(*args, des_file="soko2a.des", **kwargs) class MiniHackSokoban2b(Sokoban): def __init__(self, *args, **kwargs): - kwargs["reward_shaping_coefficient"] = 0.1 - kwargs["penalty_time"] = -0.001 super().__init__(*args, des_file="soko2b.des", **kwargs) class MiniHackSokoban3a(Sokoban): def __init__(self, *args, **kwargs): - kwargs["reward_shaping_coefficient"] = 0.1 - kwargs["penalty_time"] = -0.001 super().__init__(*args, des_file="soko3a.des", **kwargs) class MiniHackSokoban3b(Sokoban): def __init__(self, *args, **kwargs): - kwargs["reward_shaping_coefficient"] = 0.1 - kwargs["penalty_time"] = -0.001 super().__init__(*args, des_file="soko3b.des", **kwargs) class MiniHackSokoban4a(Sokoban): def __init__(self, *args, **kwargs): - kwargs["reward_shaping_coefficient"] = 0.1 - kwargs["penalty_time"] = -0.001 super().__init__(*args, des_file="soko4a.des", **kwargs) class MiniHackSokoban4b(Sokoban): def __init__(self, *args, **kwargs): - kwargs["reward_shaping_coefficient"] = 0.1 - kwargs["penalty_time"] = -0.001 super().__init__(*args, des_file="soko4b.des", **kwargs) register( - id="MiniHack-Sokoban4a-v0", + id="MiniHack-Sokoban4a-v1", entry_point="minihack.envs.sokoban:MiniHackSokoban4a", ) register( - id="MiniHack-Sokoban4b-v0", + id="MiniHack-Sokoban4b-v1", entry_point="minihack.envs.sokoban:MiniHackSokoban4b", ) register( - id="MiniHack-Sokoban3a-v0", + id="MiniHack-Sokoban3a-v1", entry_point="minihack.envs.sokoban:MiniHackSokoban3a", ) register( - id="MiniHack-Sokoban3b-v0", + id="MiniHack-Sokoban3b-v1", entry_point="minihack.envs.sokoban:MiniHackSokoban3b", ) register( - id="MiniHack-Sokoban2a-v0", + id="MiniHack-Sokoban2a-v1", entry_point="minihack.envs.sokoban:MiniHackSokoban2a", ) register( - id="MiniHack-Sokoban2b-v0", + id="MiniHack-Sokoban2b-v1", entry_point="minihack.envs.sokoban:MiniHackSokoban2b", ) register( - id="MiniHack-Sokoban1a-v0", + id="MiniHack-Sokoban1a-v1", entry_point="minihack.envs.sokoban:MiniHackSokoban1a", ) register( - id="MiniHack-Sokoban1b-v0", + id="MiniHack-Sokoban1b-v1", entry_point="minihack.envs.sokoban:MiniHackSokoban1b", )