NetHack-LE · samvelyan · Mar 3, 2025 · Feb 13, 2025 · Feb 13, 2025 · Mar 3, 2025
diff --git a/docs/envs/ported/sokoban.md b/docs/envs/ported/sokoban.md
@@ -1,6 +1,6 @@
 # Sokoban
 
-This family of environments is ported to MiniHack from [NetHack](https://github.com/heiner/nle), levels taken directly from Sokoban minigame inside NetHack, excluding monsters and items. The goal is to push boulder's to goal locations (pits or wholes).
+This family of environments is ported to MiniHack from [NetHack](https://github.com/heiner/nle), levels taken directly from Sokoban minigame inside NetHack, excluding monsters and items. The goal is to push boulder's to goal locations (pits or holes).
 
 Original dat file can be seen [here](https://github.com/heiner/nle/blob/main/dat/sokoban.des).
 and corresponding solution from [NetHack Wiki](https://nethackwiki.com/wiki/Sokoban).
@@ -9,19 +9,33 @@ An example of Sokoban level ported into MiniHack.
 
 ![](../imgs/sokoban3b.png)
 
+## Available Actions
+- Movement in 8 directions
+- OPEN
+- EAT
+- PICKUP
+
+
 ## Reward
 
-The agent receives a reward of +1 for reaching the stairs down and +0.1 for filling each pit.
+The agent receives a reward of +1 for reaching the stairs down and +0.1 for filling each pit. Additionally, a small time penalty of -0.001 is applied at each step to encourage efficient solutions.
+
+## Game Mechanics
+- The agent must navigate through the level, pushing boulders to fill pits
+- Stepping into a pit results in death and the end of the episode
+- Episodes are limited to 2000 steps by default
+- The game is considered successfully completed when all pits are filled and the agent reaches the stairs
+
 
 ## All Environments
 
 | Name                    | Capability |
 | ----------------------- | ---------- |
-| `MiniHack-Sokoban1a-v0` | Planning   |
-| `MiniHack-Sokoban1b-v0` | Planning   |
-| `MiniHack-Sokoban2a-v0` | Planning   |
-| `MiniHack-Sokoban2b-v0` | Planning   |
-| `MiniHack-Sokoban3a-v0` | Planning   |
-| `MiniHack-Sokoban3b-v0` | Planning   |
-| `MiniHack-Sokoban4a-v0` | Planning   |
-| `MiniHack-Sokoban4b-v0` | Planning   |
+| `MiniHack-Sokoban1a-v1` | Planning   |
+| `MiniHack-Sokoban1b-v1` | Planning   |
+| `MiniHack-Sokoban2a-v1` | Planning   |
+| `MiniHack-Sokoban2b-v1` | Planning   |
+| `MiniHack-Sokoban3a-v1` | Planning   |
+| `MiniHack-Sokoban3b-v1` | Planning   |
+| `MiniHack-Sokoban4a-v1` | Planning   |
+| `MiniHack-Sokoban4b-v1` | Planning   |
diff --git a/minihack/dat/soko1a.des b/minihack/dat/soko1a.des
@@ -47,3 +47,9 @@ TRAP:"pit",(04,10)
 TRAP:"pit",(05,10)
 TRAP:"pit",(06,10)
 TRAP:"pit",(07,10)
+
+# Random objects
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
diff --git a/minihack/dat/soko1b.des b/minihack/dat/soko1b.des
@@ -48,3 +48,9 @@ TRAP:"pit",(03,08)
 TRAP:"pit",(04,08)
 TRAP:"pit",(05,08)
 TRAP:"pit",(06,08)
+
+# Random objects
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
diff --git a/minihack/dat/soko2a.des b/minihack/dat/soko2a.des
@@ -56,3 +56,9 @@ TRAP:"pit",(20,10)
 TRAP:"pit",(21,10)
 TRAP:"pit",(22,10)
 TRAP:"pit",(23,10)
+
+# Random objects
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
diff --git a/minihack/dat/soko2b.des b/minihack/dat/soko2b.des
@@ -64,3 +64,9 @@ TRAP:"pit",(23,10)
 TRAP:"pit",(24,10)
 TRAP:"pit",(25,10)
 TRAP:"pit",(26,10)
+
+# Random objects
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
diff --git a/minihack/dat/soko3a.des b/minihack/dat/soko3a.des
@@ -53,3 +53,9 @@ TRAP:"pit",(14,09)
 TRAP:"pit",(15,09)
 TRAP:"pit",(16,09)
 TRAP:"pit",(17,09)
+
+# Random objects
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
diff --git a/minihack/dat/soko3b.des b/minihack/dat/soko3b.des
@@ -55,3 +55,8 @@ TRAP:"pit",(15,11)
 TRAP:"pit",(16,11)
 TRAP:"pit",(17,11)
 
+# Random objects
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
diff --git a/minihack/dat/soko4a.des b/minihack/dat/soko4a.des
@@ -76,3 +76,9 @@ DOOR:closed,(23,13)
 DOOR:closed,(17,11)
 DOOR:closed,(17,13)
 DOOR:closed,(17,15)
+
+# Random objects
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
diff --git a/minihack/dat/soko4b.des b/minihack/dat/soko4b.des
@@ -77,4 +77,10 @@ TRAP:"pit",(22,01)
 DOOR:closed,(23,12)
 DOOR:closed,(17,10)
 DOOR:closed,(17,12)
-DOOR:closed,(17,14)
+DOOR:closed,(17,14)
+
+# Random objects
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
+OBJECT:'%',random
diff --git a/minihack/envs/sokoban.py b/minihack/envs/sokoban.py
@@ -8,8 +8,8 @@
 MOVE_ACTIONS = tuple(nethack.CompassDirection)
 NAVIGATE_ACTIONS = MOVE_ACTIONS + (
     nethack.Command.OPEN,
-    nethack.Command.KICK,
-    nethack.Command.SEARCH,
+    nethack.Command.EAT,
+    nethack.Command.PICKUP,
 )
 
 
@@ -18,16 +18,16 @@ def __init__(self, *args, **kwargs):
         kwargs["max_episode_steps"] = kwargs.pop("max_episode_steps", 2000)
         kwargs["actions"] = kwargs.pop("actions", NAVIGATE_ACTIONS)
 
-        self._time_penalty = kwargs.pop("penalty_time", 0)
+        self._time_penalty = kwargs.pop("penalty_time", -0.001)
         self._reward_shaping_coefficient = kwargs.pop(
-            "reward_shaping_coefficient", 0
+            "reward_shaping_coefficient", 0.1
         )
 
         super().__init__(*args, **kwargs)
 
-    def step(self, action: int):
-        self._current_pits = self._object_positions(self.last_observation, "^")
-        return super().step(action)
+    @property
+    def current_pits(self):
+        return self._object_positions(self.last_observation, "^")
 
     def _is_episode_end(self, observation):
         result = super()._is_episode_end(observation)
@@ -38,8 +38,8 @@ def _is_episode_end(self, observation):
                 return self.StepStatus.RUNNING
 
         # stepping into a pit should result in death
-        agent_pos = list(self._object_positions(observation, "@"))[0]
-        if any([agent_pos == pos for pos in self._current_pits]):
+        agent_pos = next((pos for pos in self._object_positions(observation, "@")), None)
+        if any([agent_pos == pos for pos in self.current_pits]):
             return self.StepStatus.DEATH
 
         return result
@@ -68,89 +68,73 @@ def _object_positions(self, observation, object_char):
 
 class MiniHackSokoban1a(Sokoban):
     def __init__(self, *args, **kwargs):
-        kwargs["reward_shaping_coefficient"] = 0.1
-        kwargs["penalty_time"] = -0.001
         super().__init__(*args, des_file="soko1a.des", **kwargs)
 
 
 class MiniHackSokoban1b(Sokoban):
     def __init__(self, *args, **kwargs):
-        kwargs["reward_shaping_coefficient"] = 0.1
-        kwargs["penalty_time"] = -0.001
         super().__init__(*args, des_file="soko1b.des", **kwargs)
 
 
 class MiniHackSokoban2a(Sokoban):
     def __init__(self, *args, **kwargs):
-        kwargs["reward_shaping_coefficient"] = 0.1
-        kwargs["penalty_time"] = -0.001
         super().__init__(*args, des_file="soko2a.des", **kwargs)
 
 
 class MiniHackSokoban2b(Sokoban):
     def __init__(self, *args, **kwargs):
-        kwargs["reward_shaping_coefficient"] = 0.1
-        kwargs["penalty_time"] = -0.001
         super().__init__(*args, des_file="soko2b.des", **kwargs)
 
 
 class MiniHackSokoban3a(Sokoban):
     def __init__(self, *args, **kwargs):
-        kwargs["reward_shaping_coefficient"] = 0.1
-        kwargs["penalty_time"] = -0.001
         super().__init__(*args, des_file="soko3a.des", **kwargs)
 
 
 class MiniHackSokoban3b(Sokoban):
     def __init__(self, *args, **kwargs):
-        kwargs["reward_shaping_coefficient"] = 0.1
-        kwargs["penalty_time"] = -0.001
         super().__init__(*args, des_file="soko3b.des", **kwargs)
 
 
 class MiniHackSokoban4a(Sokoban):
     def __init__(self, *args, **kwargs):
-        kwargs["reward_shaping_coefficient"] = 0.1
-        kwargs["penalty_time"] = -0.001
         super().__init__(*args, des_file="soko4a.des", **kwargs)
 
 
 class MiniHackSokoban4b(Sokoban):
     def __init__(self, *args, **kwargs):
-        kwargs["reward_shaping_coefficient"] = 0.1
-        kwargs["penalty_time"] = -0.001
         super().__init__(*args, des_file="soko4b.des", **kwargs)
 
 
 register(
-    id="MiniHack-Sokoban4a-v0",
+    id="MiniHack-Sokoban4a-v1",
     entry_point="minihack.envs.sokoban:MiniHackSokoban4a",
 )
 register(
-    id="MiniHack-Sokoban4b-v0",
+    id="MiniHack-Sokoban4b-v1",
     entry_point="minihack.envs.sokoban:MiniHackSokoban4b",
 )
 register(
-    id="MiniHack-Sokoban3a-v0",
+    id="MiniHack-Sokoban3a-v1",
     entry_point="minihack.envs.sokoban:MiniHackSokoban3a",
 )
 register(
-    id="MiniHack-Sokoban3b-v0",
+    id="MiniHack-Sokoban3b-v1",
     entry_point="minihack.envs.sokoban:MiniHackSokoban3b",
 )
 register(
-    id="MiniHack-Sokoban2a-v0",
+    id="MiniHack-Sokoban2a-v1",
     entry_point="minihack.envs.sokoban:MiniHackSokoban2a",
 )
 register(
-    id="MiniHack-Sokoban2b-v0",
+    id="MiniHack-Sokoban2b-v1",
     entry_point="minihack.envs.sokoban:MiniHackSokoban2b",
 )
 register(
-    id="MiniHack-Sokoban1a-v0",
+    id="MiniHack-Sokoban1a-v1",
     entry_point="minihack.envs.sokoban:MiniHackSokoban1a",
 )
 register(
-    id="MiniHack-Sokoban1b-v0",
+    id="MiniHack-Sokoban1b-v1",
     entry_point="minihack.envs.sokoban:MiniHackSokoban1b",
 )