From ee3072ca1a66f1a7c9fbd1360e164587aa5c2450 Mon Sep 17 00:00:00 2001
From: SWang848 <51240743+SWang848@users.noreply.github.com>
Date: Fri, 23 Aug 2024 15:29:55 -0600
Subject: [PATCH 1/9] Update paper.bib

---
 paper.bib | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/paper.bib b/paper.bib
index 314c6073..0937b493 100644
--- a/paper.bib
+++ b/paper.bib
@@ -232,3 +232,42 @@ @InProceedings{2021NetHack
   url = 	 {https://proceedings.mlr.press/v176/hambro22a.html},
   abstract = 	 {In this report, we summarize the takeaways from the first NeurIPS 2021 NetHack Challenge. Participants were tasked with developing a program or agent that can win (i.e., ’ascend’ in) the popular dungeon-crawler game of NetHack by interacting with the NetHack Learning Environment (NLE), a scalable, procedurally generated, and challenging Gym environment for reinforcement learning (RL). The challenge showcased community-driven progress in AI with many diverse approaches significantly beating the previously best results on NetHack. Furthermore, it served as a direct comparison between neural (e.g., deep RL) and symbolic AI, as well as hybrid systems, demonstrating that on NetHack symbolic bots currently outperform deep RL by a large margin. Lastly, no agent got close to winning the game, illustrating NetHack’s suitability as a long-term benchmark for AI research.}
 }
+
+@article{botvinick2014model,
+  title={Model-based hierarchical reinforcement learning and human action control},
+  author={Botvinick, Matthew and Weinstein, Aaron},
+  journal={Philosophical Transactions of the Royal Society B: Biological Sciences},
+  volume={369},
+  number={1655},
+  pages={20130480},
+  year={2014},
+  publisher={The Royal Society}
+}
+
+@inproceedings{bacon2017option,
+  title={The option-critic architecture},
+  author={Bacon, Pierre-Luc and Harb, Mohamad and Precup, Doina},
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+  volume={31},
+  number={1},
+  year={2017}
+}
+
+@inproceedings{heess2016learning,
+  title={Learning continuous control policies by stochastic value gradients},
+  author={Heess, Nicolas and Wayne, Greg and Silver, David and Lillicrap, Timothy and Erez, Tom and Tassa, Yuval},
+  booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
+  volume={29},
+  pages={2944--2952},
+  year={2016}
+}
+@inproceedings{nachum2018data,
+  title={Data-efficient hierarchical reinforcement learning},
+  author={Nachum, Ofir and Gu, Shixiang and Lee, Honglak and Levine, Sergey},
+  booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
+  volume={31},
+  pages={3303--3313},
+  year={2018}
+}
+
+

From c2f53279f971d7b7e8e7cff5d1cde25c9639bf30 Mon Sep 17 00:00:00 2001
From: SWang848 <51240743+SWang848@users.noreply.github.com>
Date: Fri, 23 Aug 2024 15:30:35 -0600
Subject: [PATCH 2/9] Update paper.md

---
 paper.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paper.md b/paper.md
index 9542b9c0..e0c22261 100644
--- a/paper.md
+++ b/paper.md
@@ -37,7 +37,7 @@ bibliography: paper.bib
 ---
 
 # Summary
-Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence. Existing methods may struggle when confronted with hierarchical tasks, yet there is a scarcity of suitable environments or benchmarks designed to comprehend how the structure of the underlying hierarchy influence a task difficulty. Our software represents a crucial initial step in the development of tools aimed at addressing research questions related to hierarchical reasoning.
+Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence [@botvinick2014model]. Existing methods may struggle when confronted with hierarchical tasks [@bacon2017option,@heess2016learning,@nachum2018data], yet there is a scarcity of suitable environments or benchmarks designed to comprehend how the structure of the underlying hierarchy influence a task difficulty. Our software represents a crucial initial step in the development of tools aimed at addressing research questions related to hierarchical reasoning.
 
 We introduce **HierarchyCraft**, a lightweight environment builder designed for creating hierarchical reasoning tasks that do not necessitate feature extraction. This includes tasks containing pixel images, text, sound, or any data requiring deep-learning based feature extraction.
 HierarchyCraft serves a dual purpose by offering a set of pre-defined hierarchical environments and simplifying the process of creating customized hierarchical environments.
@@ -72,7 +72,7 @@ Due to the sparse rewards, exploration difficulty, and long time horizons in thi
 Unfortunately, DreamerV3 required training on an Nvidia V100 GPU for 17 days, gathering around 100 million environmental steps.
 Such **substantial computational resources are inaccessible to most researchers**, impeding the overall progress of research on hierarchical reasoning.
 
-Moreover, although Minecraft has a undeniably complex hierarchical structure, **this underlying hierarchical structures is fixed** and cannot be modified without modding the game, a complex task for researchers.
+Moreover, although Minecraft has an undeniably complex hierarchical structure, **this underlying hierarchical structure is fixed** and cannot be modified without modding the game, a complex task for researchers.
 
 
 
@@ -82,7 +82,7 @@ Moreover, although Minecraft has a undeniably complex hierarchical structure, **
 Crafter [@hafner2022benchmarking] presents a lightweight grid-based 2D environment, with game mechanics akin to Minecraft and poses similar challenges including exploration, representation learning, rewards sparsity and long-term reasoning.
 Although Crafter offers 22 different tasks displayed in \autoref{fig:CrafterRequirements}, the **fixed underlying hierarchical structure** restricts how researchers can investigate the impacts of changes in this structure.
 
-Furthermore, the tasks considered by the authors do not include navigation subtasks (e.g., Find water, look for a cow, wait for a plant to grow, go back to a table...) or certain optional but useful subtasks (e.g., Swords and the skill of dodging arrows contribute to making the task of killing skeletons easier.), leading to abrupt drops in success rates in the hierarchy instead of a more gradual increase in difficulty.
+Furthermore, the tasks considered by the authors do not include navigation subtasks (e.g., find water, look for a cow, wait for a plant to grow, go back to a table...) or certain optional but useful subtasks (e.g., swords and the skill of dodging arrows contribute to making the task of killing skeletons easier), leading to abrupt drops in success rates in the hierarchy instead of a more gradual increase in difficulty.
 
 ![Partial Hierarchical structure of the Crafter environment. Inspired from Figure 4 of [@hafner2022benchmarking]\label{fig:CrafterRequirements}](docs/images/CrafterRequirementsGraph.png){ width=80% }
 
@@ -110,7 +110,7 @@ The arcade learning environment [@ALE] stands as a standard benchmark in reinfor
 HierarchyCraft aims to be a fruitful tool for investigating hierarchical reasoning, focusing on achieving the following four design goals.
 
 ### 1. Hierarchical by design
-The action space of HierarchyCraft environments consists of sub-tasks, referred to as *Transformations*, as opposed to detailed movements and controls. But each *Transformations* has specific requirements to be valid (eg. have enought of an item, be in the right place), and these requirements may necessitate the execution of other *Transformations* first, inherently creating a hierarchical structure in HierarchyCraft environments.
+The action space of HierarchyCraft environments consists of sub-tasks, referred to as *Transformations*, as opposed to detailed movements and controls. But each of *Transformations* has specific requirements to be valid (e.g. have enough of an item, be in the right place), and these requirements may necessitate the execution of other *Transformations* first, inherently creating a hierarchical structure in HierarchyCraft environments.
 
 This concept  is visually represented by the *Requirements graph* depicting the hierarchical relationships within each HierarchyCraft environment.
 The *Requirements graph* is directly constructed from the list of *Transformations* composing the environement, as illustrated in \autoref{fig:TransformationToRequirements}.

From 60996e8f5982f61ede0d2a0449975bf1b2e11bc5 Mon Sep 17 00:00:00 2001
From: SWang848 <51240743+SWang848@users.noreply.github.com>
Date: Fri, 23 Aug 2024 16:07:03 -0600
Subject: [PATCH 3/9] Update paper.md

---
 paper.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paper.md b/paper.md
index e0c22261..3ed44133 100644
--- a/paper.md
+++ b/paper.md
@@ -39,7 +39,7 @@ bibliography: paper.bib
 # Summary
 Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence [@botvinick2014model]. Existing methods may struggle when confronted with hierarchical tasks [@bacon2017option,@heess2016learning,@nachum2018data], yet there is a scarcity of suitable environments or benchmarks designed to comprehend how the structure of the underlying hierarchy influence a task difficulty. Our software represents a crucial initial step in the development of tools aimed at addressing research questions related to hierarchical reasoning.
 
-We introduce **HierarchyCraft**, a lightweight environment builder designed for creating hierarchical reasoning tasks that do not necessitate feature extraction. This includes tasks containing pixel images, text, sound, or any data requiring deep-learning based feature extraction.
+We introduce **HierarchyCraft**, a lightweight environment builder designed for creating hierarchical reasoning tasks that do not necessitate feature extraction. This includes tasks involving pixel images, text, sound, or other data types where deep learning-based feature extraction is commonly employed.
 HierarchyCraft serves a dual purpose by offering a set of pre-defined hierarchical environments and simplifying the process of creating customized hierarchical environments.
 
 
@@ -120,7 +120,7 @@ Requirements graphs should be viewed as a generalization of previously observed
 ![How sub-tasks build a hierarchical structure.\label{fig:TransformationToRequirements}](docs/images/TransformationToRequirementsLarge.png){ width=75% }
 
 ### 2. No feature extraction needed
-In contrast to benchmarks that yield grids, pixel arrays, text, or sound, HierarchyCraft directly provides a low-dimensional latent representation that does not require learning, as depicted in \autoref{fig:HierarchyCraftState}.
+In contrast to benchmarks that yield grids, pixel arrays, text, or sound, HierarchyCraft directly provides a low-dimensional representation that does not require the further features extraction, as depicted in Figure \autoref{fig:HierarchyCraftState}.
 This not only saves computational time but also enables researchers to concentrate on hierarchical reasoning while additionally  allowing for the utilization of classical planning frameworks such as PDDL [@PDDL] or ANML [@ANML].
 
 ![HierarchyCraft state is already a compact representation.\label{fig:HierarchyCraftState}](docs/images/HierarchyCraftStateLarge.png){ width=80% }

From f966d00994e569ac7424d52e591a8a713b6e1d74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Math=C3=AFs=20F=C3=A9d=C3=A9rico?= <mathfederico@gmail.com>
Date: Sun, 12 Jan 2025 17:11:24 +0100
Subject: [PATCH 4/9] =?UTF-8?q?=F0=9F=93=9D=20Update=20paper=20with=20sugg?=
 =?UTF-8?q?estions=20of=20@metaylor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paper.md | 57 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/paper.md b/paper.md
index 3ed44133..c148aa97 100644
--- a/paper.md
+++ b/paper.md
@@ -1,5 +1,5 @@
 ---
-title: 'Benchmarking Hierarchical Reasoning with HierarchyCraft'
+title: 'HierarchyCraft: A Benchmark builder For Hierarchical Reasoning'
 tags:
   - Python
   - Hierarchy
@@ -20,7 +20,7 @@ authors:
     affiliation: "1"
   - name: Yuxuan Li
     orcid: 0000-0001-5522-312X
-    affiliation: "1,3"
+    affiliation: "1, 3"
   - name: Matthew E. Taylor
     orcid: 0000-0001-8946-0211
     affiliation: "1, 3"
@@ -36,9 +36,10 @@ bibliography: paper.bib
 
 ---
 
+
 # Summary
-Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence [@botvinick2014model]. Existing methods may struggle when confronted with hierarchical tasks [@bacon2017option,@heess2016learning,@nachum2018data], yet there is a scarcity of suitable environments or benchmarks designed to comprehend how the structure of the underlying hierarchy influence a task difficulty. Our software represents a crucial initial step in the development of tools aimed at addressing research questions related to hierarchical reasoning.
 
+Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence [@botvinick2014model]. Existing methods may struggle when confronted with hierarchical tasks [@bacon2017option,@heess2016learning,@nachum2018data], yet there is a scarcity of suitable environments or benchmarks designed to comprehend how the structure of the underlying hierarchy influence a task difficulty. Our software represents an important initial step in the development of tools aimed at addressing research questions related to hierarchical reasoning.
 We introduce **HierarchyCraft**, a lightweight environment builder designed for creating hierarchical reasoning tasks that do not necessitate feature extraction. This includes tasks involving pixel images, text, sound, or other data types where deep learning-based feature extraction is commonly employed.
 HierarchyCraft serves a dual purpose by offering a set of pre-defined hierarchical environments and simplifying the process of creating customized hierarchical environments.
 
@@ -47,14 +48,15 @@ HierarchyCraft serves a dual purpose by offering a set of pre-defined hierarchic
 
 
 # Statement of need
+
 HierarchyCraft is designed as a user-friendly Python library for constructing environments tailored to the study of hierarchical reasoning  in the contexts of reinforcement learning, classical planning, and program synthesis as displayed in \autoref{fig:HierachyCraft_domain_position}.
 
 Analysis and quantification of the impacts of diverse hierarchical structures on learning agents is essential for advancing hierarchical reasoning.
 However, current hierarchical benchmarks often limit themselves to a single hierarchical structure per benchmark, and present challenges not only due to this inherent hierarchical structure but also because of the necessary representation learning to interpret the inputs.
 
 We argue that arbitrary hierarchical complexity can emerge from simple rules without the need for learning a representation.
-To the best of our knowledge, no general frameworks currently exist for constructing environments dedicated to studying the hierarchical structure itself, underscoring the necessity for the development of tools like HierarchyCraft.
-We compare five particularly related benchmarks to HierarchyCraft.
+To the best of our knowledge, no general frameworks currently exist for constructing environments dedicated to studying the hierarchical structure itself. We next highlight five related benchmarks, underscoring the necessity for the development of a tool like HierarchyCraft.
+
 
 ### GridWorld
 
@@ -64,31 +66,24 @@ Minigrid [@minigrid] is a user-friendly Python library that not only implements
 
 ![Example of Minigrid environments hierarchical structures and their relationships. There is only a few possible sub-tasks and most of them are navigation tasks (in green).\label{fig:MinigridHierarchies}](docs/images/MinigridHierarchies.png){ width=100% }
 
+
 ### Minecraft
 
 An exemplary instance of a hierarchical task is the collection of diamonds in the popular video game Minecraft, as showcased in the MineRL competition [@guss2021minerl2020], where hierarchical reinforcement learning agents have dominated the leaderboard[@milani2020minerl2019].
-
-Due to the sparse rewards, exploration difficulty, and long time horizons in this procedurally generated sandbox environment, DreamerV3 [@dreamerv3] recently became the first algorithm to successfully collect diamonds in Minecraft without prior training or knowledge.
-Unfortunately, DreamerV3 required training on an Nvidia V100 GPU for 17 days, gathering around 100 million environmental steps.
-Such **substantial computational resources are inaccessible to most researchers**, impeding the overall progress of research on hierarchical reasoning.
-
+Due to sparse rewards, the difficulty of exploration, and long time horizons in this procedurally generated sandbox environment, DreamerV3 [@dreamerv3] recently became the first algorithm to successfully collect diamonds in Minecraft without prior training or knowledge.
+Unfortunately, DreamerV3 required training on an Nvidia V100 GPU for 17 days, gathering roughly 100 million environmental steps.
+Such **substantial computational resources are unavailable to many researchers**, impeding the overall progress of research on hierarchical reasoning.
 Moreover, although Minecraft has an undeniably complex hierarchical structure, **this underlying hierarchical structure is fixed** and cannot be modified without modding the game, a complex task for researchers.
 
 
-
-
 ### Crafter
 
 Crafter [@hafner2022benchmarking] presents a lightweight grid-based 2D environment, with game mechanics akin to Minecraft and poses similar challenges including exploration, representation learning, rewards sparsity and long-term reasoning.
-Although Crafter offers 22 different tasks displayed in \autoref{fig:CrafterRequirements}, the **fixed underlying hierarchical structure** restricts how researchers can investigate the impacts of changes in this structure.
-
-Furthermore, the tasks considered by the authors do not include navigation subtasks (e.g., find water, look for a cow, wait for a plant to grow, go back to a table...) or certain optional but useful subtasks (e.g., swords and the skill of dodging arrows contribute to making the task of killing skeletons easier), leading to abrupt drops in success rates in the hierarchy instead of a more gradual increase in difficulty.
-
+Although Crafter offers 22 different tasks displayed in \autoref{fig:CrafterRequirements}, the **underlying hierarchical structure is fixed**, restricting how researchers can investigate the impacts of changes to the hirerachy.
+Furthermore, the tasks considered by the authors do not include navigation subtasks (e.g., find water, look for a cow, wait for a plant to grow, go back to a table, etc.) or certain optional but useful subtasks (e.g., swords and the skill of dodging arrows contribute to making the task of killing skeletons easier), leading to abrupt drops in success rates in the hierarchy instead of a more gradual increase in difficulty.
 ![Partial Hierarchical structure of the Crafter environment. Inspired from Figure 4 of [@hafner2022benchmarking]\label{fig:CrafterRequirements}](docs/images/CrafterRequirementsGraph.png){ width=80% }
 
 
-
-
 ### PDDLGym
 
 PDDLGym [@PDDLgym] is a Python library that automatically constructs Gym environments from Planning Domain Definition Language (PDDL) domains and problems. PDDL [@PDDL] functions as a problem specification language, facilitating the comparison of different symbolic planners. However, constructing PDDL domains and problems with a hierarchical structure is challenging and time-consuming, especially for researchers unfamiliar with PDDL-like languages.
@@ -100,38 +95,42 @@ Additionally, PDDLGym is **compatible only with PDDL1** and does not support num
 The NetHack learning environment [@kuttler2020nethack] is based on the game NetHack, where the observation is a grid composed of hundreds of possible symbols.
 Large numbers of items are randomly placed in each level, making NetHack extremely complex and challenging. In fact, NetHack is **too complex for agents to learn**, it requires many environment steps for agents to acquire domain-specific knowledge. 10B steps were required for the NeurIPS 2021 NetHack challenge [@2021NetHack], making it impractically long for a benchmark. Moreover, the NetHack game also has a **fixed underlying hierarchy** that cannot be easily modified. -->
 
-### Arcade Learning Environment (Atari)
-
-The arcade learning environment [@ALE] stands as a standard benchmark in reinforcement learning, encompassing over 55 Atari games. However, **only a few of these games, such as Montezuma's Revenge and Pitfall, necessitate hierarchical reasoning**. Each Atari games has a **fixed hierarchy that cannot be modified** and agents **demand substantial computational resources** to extract relevant features from pixels or memory, significantly slowing down experiments.
 
+### Arcade Learning Environment (Atari)
+The arcade learning environment [@ALE] stands as a standard benchmark in reinforcement learning, encompassing over 55 Atari games. However, **only a few of these games, such as Montezuma’s Revenge and Pitfall, necessitate hierarchical reasoning**. Each Atari games has a **fixed hierarchy that cannot be modified** and agents **demand substantial computational resources** to extract relevant features from pixels or memory, significantly slowing down experiments.
 
 ## Design goals
 
 HierarchyCraft aims to be a fruitful tool for investigating hierarchical reasoning, focusing on achieving the following four design goals.
 
-### 1. Hierarchical by design
-The action space of HierarchyCraft environments consists of sub-tasks, referred to as *Transformations*, as opposed to detailed movements and controls. But each of *Transformations* has specific requirements to be valid (e.g. have enough of an item, be in the right place), and these requirements may necessitate the execution of other *Transformations* first, inherently creating a hierarchical structure in HierarchyCraft environments.
 
-This concept  is visually represented by the *Requirements graph* depicting the hierarchical relationships within each HierarchyCraft environment.
-The *Requirements graph* is directly constructed from the list of *Transformations* composing the environement, as illustrated in \autoref{fig:TransformationToRequirements}.
+### 1. Hierarchical by design
 
+The action space of HierarchyCraft environments consists of sub-tasks, referred to as *Transformations*, as opposed to detailed movements and controls. But each of *Transformations* has specific requirements to be valid (e.g. have enough of an item, be in the right place), and these requirements may necessitate the execution of other *Transformations* first, inherently creating a hierarchical structure in HierarchyCraft environments.
+This concept  is visually represented by the _Requirements graph_ depicting the hierarchical relationships within each HierarchyCraft environment.
+The _Requirements graph_ is directly constructed from the list of *Transformations* composing the environement, as illustrated in \autoref{fig:TransformationToRequirements}.
 Requirements graphs should be viewed as a generalization of previously observed graphical representations from related works, including  \autoref{fig:CrafterRequirements} and \autoref{fig:MinigridHierarchies}.
 
 ![How sub-tasks build a hierarchical structure.\label{fig:TransformationToRequirements}](docs/images/TransformationToRequirementsLarge.png){ width=75% }
 
+
 ### 2. No feature extraction needed
-In contrast to benchmarks that yield grids, pixel arrays, text, or sound, HierarchyCraft directly provides a low-dimensional representation that does not require the further features extraction, as depicted in Figure \autoref{fig:HierarchyCraftState}.
-This not only saves computational time but also enables researchers to concentrate on hierarchical reasoning while additionally  allowing for the utilization of classical planning frameworks such as PDDL [@PDDL] or ANML [@ANML].
 
+In contrast to benchmarks that yield grids, pixel arrays, text, or sound, HierarchyCraft directly provides a low-dimensional representation that does not require the further features extraction, as depicted in Figure \autoref{fig:HierarchyCraftState}.
+This not only saves computational time but also enables researchers to concentrate on hierarchical reasoning while additionally leveraging classical planning frameworks such as PDDL [@PDDL] or ANML [@ANML].
 ![HierarchyCraft state is already a compact representation.\label{fig:HierarchyCraftState}](docs/images/HierarchyCraftStateLarge.png){ width=80% }
 
+
 ### 3. Easy to use and customize
+
 HierarchyCraft is a versatile framework enabling the creation of diverse hierarchical environments.
 The library is designed to be simple and flexible, allowing researchers to define their own hierarchical environments with detailed guidance provided in the documentation.
 To showcase the range of environments possible within HierarchyCraft, multiple examples are provided.
 
-### 4. Compatible with domains frameworks
-HierarchyCraft environments are directly compatible with both reinforcement learning through Gymnasium [@gymnasium] and planning through the Unified Planning Framework [@UPF] (see \autoref{fig:HierarchyCraft-pipeline}).
+
+### 4. Compatible with multiple frameworks
+
+HierarchyCraft environments are directly compatible with both reinforcement learning through OpenAI Gym [@gym] and planning through the Unified Planning Framework [@UPF] (see \autoref{fig:HierarchyCraft-pipeline}).
 This compatibility facilitates usage by both the reinforcement learning and planning communities.
 
 ![HierarchyCraft pipeline into different representations.\label{fig:HierarchyCraft-pipeline}](docs/images/HierarchyCraft_pipeline.png){ width=80% }

From e464860083e4ab2e872a8c456fc25c6c25189342 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Math=C3=AFs=20F=C3=A9d=C3=A9rico?= <mathfederico@gmail.com>
Date: Sun, 12 Jan 2025 18:31:04 +0100
Subject: [PATCH 5/9] =?UTF-8?q?=F0=9F=93=9D=20Update=20TransformationToReq?=
 =?UTF-8?q?uirements=20in=20the=20README=20to=20the=20smaller=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b804b5a9..f8599e7f 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ The action space of HierarchyCraft environments consists of sub-tasks, referred
 This concept is visually represented by the *Requirements graph* depicting the hierarchical relationships within each HierarchyCraft environment.
 The *Requirements graph* is directly constructed from the list of *Transformations* composing the environement.
 
-![](docs/images/TransformationToRequirementsLarge.png)
+![](docs/images/TransformationToRequirements.png)
 
 More details about requirements graph can be found in the documentation at [`hcraft.requirements`](https://irll.github.io/HierarchyCraft/hcraft/requirements.html) and example of requirements graph for some HierarchyCraft environements can be found in [`hcraft.examples`](https://irll.github.io/HierarchyCraft/hcraft/examples.html).
 

From b5cc1835300acd7be6e118b54d5bca2098ada773 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Math=C3=AFs=20F=C3=A9d=C3=A9rico?= <mathfederico@gmail.com>
Date: Sun, 12 Jan 2025 18:47:38 +0100
Subject: [PATCH 6/9] =?UTF-8?q?=F0=9F=92=9A=20Fix=20docs=20CI?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/pydoc-github-pages.yml | 35 ++++++++++--------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/pydoc-github-pages.yml b/.github/workflows/pydoc-github-pages.yml
index c3b91b3b..4e180c05 100644
--- a/.github/workflows/pydoc-github-pages.yml
+++ b/.github/workflows/pydoc-github-pages.yml
@@ -19,26 +19,21 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-        with:
-          python-version: '3.11'
-
-    - uses: actions/checkout@v2
-    - name: Install uv
-      uses: astral-sh/setup-uv@v5
-    - name: Set up venv with Python ${{ matrix.python-version }}
-      run: |
-        uv venv --python ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        uv sync --extra docs --extra gym --extra gui --extra planning --extra htmlvis
-    - name: Make documentation
-    - run: |
-        uv run pdoc -d google -t docs/template --logo https://irll.net/irll-logo.png --logo-link https://irll.github.io/HierarchyCraft/hcraft.html -o docs/build --math --no-search hcraft
-    - name: Upload static documentation artifact
-      - uses: actions/upload-pages-artifact@v1
-        with:
-          path: docs/build
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Set up venv with Python ${{ matrix.python-version }}
+        run: |
+          uv venv --python ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          uv sync --extra docs --extra gym --extra gui --extra planning --extra htmlvis
+      - name: Make documentation
+      - run: |
+          uv run pdoc -d google -t docs/template --logo https://irll.net/irll-logo.png --logo-link https://irll.github.io/HierarchyCraft/hcraft.html -o docs/build --math --no-search hcraft
+      - name: Upload static documentation artifact
+        - uses: actions/upload-pages-artifact@v1
+          with:
+            path: docs/build
 
   # Deploy the artifact to GitHub pages.
   # This is a separate job so that only actions/deploy-pages has the necessary permissions.

From e380ec38021193c5824ea8ad680f90aaffcfe9ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Math=C3=AFs=20F=C3=A9d=C3=A9rico?= <mathfederico@gmail.com>
Date: Sun, 12 Jan 2025 19:00:10 +0100
Subject: [PATCH 7/9] =?UTF-8?q?=F0=9F=93=9D=20Update=20minigrid=20ref=20in?=
 =?UTF-8?q?=20paper?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paper.bib | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/paper.bib b/paper.bib
index 0937b493..d479dce9 100644
--- a/paper.bib
+++ b/paper.bib
@@ -34,11 +34,14 @@ @article{sutton1999between
   publisher={Elsevier}
 }
 
-@software{minigrid,
-  author = {Chevalier-Boisvert, Maxime and Willems, Lucas and Pal, Suman},
-  title = {Minimalistic Gridworld Environment for Gymnasium},
-  url = {https://github.com/Farama-Foundation/Minigrid},
-  year = {2018},
+
+@inproceedings{minigrid,
+  author       = {Maxime Chevalier{-}Boisvert and Bolun Dai and Mark Towers and Rodrigo Perez{-}Vicente and Lucas Willems and Salem Lahlou and Suman Pal and Pablo Samuel Castro and Jordan Terry},
+  title        = {Minigrid {\&} Miniworld: Modular {\&} Customizable Reinforcement Learning Environments for Goal-Oriented Tasks},
+  booktitle    = {Advances in Neural Information Processing Systems 36, New Orleans, LA, USA},
+  month        = {December},
+  year         = {2023},
+  url          = {https://github.com/Farama-Foundation/Minigrid},
 }
 
 @article{machado2018revisiting,

From 103e8f475f88bf1b38d1e64ed8731bb139a9a346 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Math=C3=AFs=20F=C3=A9d=C3=A9rico?= <mathfederico@gmail.com>
Date: Sun, 12 Jan 2025 19:00:33 +0100
Subject: [PATCH 8/9] =?UTF-8?q?=F0=9F=93=9D=20Update=20paper=20content=20w?=
 =?UTF-8?q?ith=20last=20issues?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paper.md | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/paper.md b/paper.md
index c148aa97..9ae4267e 100644
--- a/paper.md
+++ b/paper.md
@@ -39,8 +39,12 @@ bibliography: paper.bib
 
 # Summary
 
-Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence [@botvinick2014model]. Existing methods may struggle when confronted with hierarchical tasks [@bacon2017option,@heess2016learning,@nachum2018data], yet there is a scarcity of suitable environments or benchmarks designed to comprehend how the structure of the underlying hierarchy influence a task difficulty. Our software represents an important initial step in the development of tools aimed at addressing research questions related to hierarchical reasoning.
-We introduce **HierarchyCraft**, a lightweight environment builder designed for creating hierarchical reasoning tasks that do not necessitate feature extraction. This includes tasks involving pixel images, text, sound, or other data types where deep learning-based feature extraction is commonly employed.
+Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence [@botvinick2014model]. Existing methods may struggle when confronted with hierarchical tasks [@bacon2017option,@heess2016learning,@nachum2018data], yet despite the importance of understanding how the structure of an underlying hierarchy affects task difficulty, there is a lack of suitable environments or benchmarks to facilitate this exploration.
+
+We introduce **HierarchyCraft** a software package that enables reasearchers to create custom environments based only on their hierarchical structures. This represents an important first step in developing tools for studying hierarchical reasoning and .
+
+**HierarchyCraft** excludes unstructured data, such as pixel images, text, sound, or other data types typically processed using deep learning techniques, for two key reasons. First, this focus allows researchers to isolate and study hierarchical behavior without the complexity introduced by feature extraction, which is straightforward in this context compared to most reinforcement learning environments. The primary source of difficulty lies in the hierarchical structure of the actions to be performed. Second, by excluding these data types, HierarchyCraft ensures that the environments are well-suited for classical planning algorithms, enabling meaningful comparisons between hierarchical decision-making in classical planning and reinforcement learning.
+
 HierarchyCraft serves a dual purpose by offering a set of pre-defined hierarchical environments and simplifying the process of creating customized hierarchical environments.
 
 
@@ -54,6 +58,16 @@ HierarchyCraft is designed as a user-friendly Python library for constructing en
 Analysis and quantification of the impacts of diverse hierarchical structures on learning agents is essential for advancing hierarchical reasoning.
 However, current hierarchical benchmarks often limit themselves to a single hierarchical structure per benchmark, and present challenges not only due to this inherent hierarchical structure but also because of the necessary representation learning to interpret the inputs.
 
+HierarchyCraft is not a benchmark itself, but a benchmark builder 
+designed to facilitate the study of how different hierarchical structures influence 
+the performance of classical planners and reinforcement learning algorithms. 
+It includes several 
+[examples](https://github.com/IRLL/HierarchyCraft/tree/master/src/hcraft/examples), 
+detailed in 
+[the package documentation](https://irll.github.io/HierarchyCraft/hcraft/examples),
+that can serve as initial benchmarks — those example includes basic parametrised hierarchical structure builders (Random, Recursive, Tower), and fixed one imitating other environments (MineHCraft imitating Minecraft tasks, MiniHCraft imitating Minigrid tasks).
+However, researchers are encouraged to create their own custom hierarchical environments to further explore hierarchical structures of their choice.
+
 We argue that arbitrary hierarchical complexity can emerge from simple rules without the need for learning a representation.
 To the best of our knowledge, no general frameworks currently exist for constructing environments dedicated to studying the hierarchical structure itself. We next highlight five related benchmarks, underscoring the necessity for the development of a tool like HierarchyCraft.
 
@@ -80,8 +94,10 @@ Moreover, although Minecraft has an undeniably complex hierarchical structure, *
 
 Crafter [@hafner2022benchmarking] presents a lightweight grid-based 2D environment, with game mechanics akin to Minecraft and poses similar challenges including exploration, representation learning, rewards sparsity and long-term reasoning.
 Although Crafter offers 22 different tasks displayed in \autoref{fig:CrafterRequirements}, the **underlying hierarchical structure is fixed**, restricting how researchers can investigate the impacts of changes to the hirerachy.
-Furthermore, the tasks considered by the authors do not include navigation subtasks (e.g., find water, look for a cow, wait for a plant to grow, go back to a table, etc.) or certain optional but useful subtasks (e.g., swords and the skill of dodging arrows contribute to making the task of killing skeletons easier), leading to abrupt drops in success rates in the hierarchy instead of a more gradual increase in difficulty.
-![Partial Hierarchical structure of the Crafter environment. Inspired from Figure 4 of [@hafner2022benchmarking]\label{fig:CrafterRequirements}](docs/images/CrafterRequirementsGraph.png){ width=80% }
+Moreover, the tasks considered by the authors do not encompass various navigation-related subtasks (such as finding water, locating a cow, waiting for a plant to grow, or returning to a table), nor do they include certain optional but beneficial subtasks (for example, using swords or the skill of dodging arrows can make the task of defeating skeletons easier).
+
+This omission results in abrupt drops in success rates within the hierarchy, rather than a more gradual progression in difficulty. This highlights that the hierarchy presented by the authors is incomplete, as it fails to capture the full range of subtasks in Crafter and the necessary or helpful interactions between them for successfully completing higher-level tasks.
+![Hierarchical structure of the Crafter environment as presented by the authors of Crafting with their success rates. Inspired from Figure 4 of [@hafner2022benchmarking]\label{fig:CrafterRequirements}](docs/images/CrafterRequirementsGraph.png){ width=80% }
 
 
 ### PDDLGym
@@ -117,7 +133,7 @@ Requirements graphs should be viewed as a generalization of previously observed
 ### 2. No feature extraction needed
 
 In contrast to benchmarks that yield grids, pixel arrays, text, or sound, HierarchyCraft directly provides a low-dimensional representation that does not require the further features extraction, as depicted in Figure \autoref{fig:HierarchyCraftState}.
-This not only saves computational time but also enables researchers to concentrate on hierarchical reasoning while additionally leveraging classical planning frameworks such as PDDL [@PDDL] or ANML [@ANML].
+This not only saves computational time but also enables researchers to concentrate on hierarchical reasoning, allow the use of classical planning frameworks such as PDDL [@PDDL] or ANML [@ANML], and enables the creation of any arbitrary complex custom environment from a list of *Transformation*, nothing more.
 ![HierarchyCraft state is already a compact representation.\label{fig:HierarchyCraftState}](docs/images/HierarchyCraftStateLarge.png){ width=80% }
 
 

From fbf88f1751cf8a42f1ac7631996397da675f4422 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Math=C3=AFs=20F=C3=A9d=C3=A9rico?= <mathfederico@gmail.com>
Date: Mon, 20 Jan 2025 19:37:50 +0100
Subject: [PATCH 9/9] =?UTF-8?q?=F0=9F=93=9D=20Update=20the=20paper?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 paper.bib |   9 +++++
 paper.md  | 104 ++++++++++++++++++++++++++++++++----------------------
 2 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/paper.bib b/paper.bib
index d479dce9..602d1415 100644
--- a/paper.bib
+++ b/paper.bib
@@ -273,4 +273,13 @@ @inproceedings{nachum2018data
   year={2018}
 }
 
+@article{Chollet2019OnTM,
+  title={On the Measure of Intelligence},
+  author={François Chollet},
+  journal={ArXiv},
+  year={2019},
+  volume={abs/1911.01547},
+  url={https://api.semanticscholar.org/CorpusID:207870692}
+}
+
 
diff --git a/paper.md b/paper.md
index 9ae4267e..6af02a66 100644
--- a/paper.md
+++ b/paper.md
@@ -39,36 +39,28 @@ bibliography: paper.bib
 
 # Summary
 
-Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence [@botvinick2014model]. Existing methods may struggle when confronted with hierarchical tasks [@bacon2017option,@heess2016learning,@nachum2018data], yet despite the importance of understanding how the structure of an underlying hierarchy affects task difficulty, there is a lack of suitable environments or benchmarks to facilitate this exploration.
+Hierarchical reasoning poses a fundamental challenge in the field of artificial intelligence [@botvinick2014model]. Existing methods may struggle when confronted with hierarchical tasks [@bacon2017option;@heess2016learning; @nachum2018data], yet despite the importance of understanding how the structure of an underlying hierarchy affects task difficulty, there is a lack of suitable environments or benchmarks to facilitate this exploration.
 
-We introduce **HierarchyCraft** a software package that enables reasearchers to create custom environments based only on their hierarchical structures. This represents an important first step in developing tools for studying hierarchical reasoning and .
+We introduce **[HierarchyCraft](https://github.com/IRLL/HierarchyCraft)**, a software package that allows researchers to create custom environments based on their hierarchical structures, enabling the study of hierarchical reasoning.
 
-**HierarchyCraft** excludes unstructured data, such as pixel images, text, sound, or other data types typically processed using deep learning techniques, for two key reasons. First, this focus allows researchers to isolate and study hierarchical behavior without the complexity introduced by feature extraction, which is straightforward in this context compared to most reinforcement learning environments. The primary source of difficulty lies in the hierarchical structure of the actions to be performed. Second, by excluding these data types, HierarchyCraft ensures that the environments are well-suited for classical planning algorithms, enabling meaningful comparisons between hierarchical decision-making in classical planning and reinforcement learning.
+To isolate hierarchical behavior and ensure compatiblilty with classical planning algorithms, we excludes unstructured data like images and text, avoiding the complexity of feature extraction, and allowing comparisons between classical planning and reinforcement learning.
 
-HierarchyCraft serves a dual purpose by offering a set of pre-defined hierarchical environments and simplifying the process of creating customized hierarchical environments.
+HierarchyCraft simplifies the creation of diverse hierarchical environments from a list of [a single building block](https://irll.github.io/HierarchyCraft/hcraft/transformation) as showcased by the [set of pre-defined environments](https://irll.github.io/HierarchyCraft/hcraft/examples).
 
 
-![HierarchyCraft is at the intersection of Reinforcement learning, Planning, Hierarchical reasoning and Program synthesis.\label{fig:HierachyCraft_domain_position}](docs/images/HierachyCraft_domain_position.png){ width=75% }
+![HierarchyCraft is at the intersection of Reinforcement learning, Planning, Hierarchical reasoning and Program synthesis.\label{fig:HierachyCraft_domain_position}](docs/images/HierachyCraft_domain_position.png){ width=70% }
 
 
 # Statement of need
 
-HierarchyCraft is designed as a user-friendly Python library for constructing environments tailored to the study of hierarchical reasoning  in the contexts of reinforcement learning, classical planning, and program synthesis as displayed in \autoref{fig:HierachyCraft_domain_position}.
+HierarchyCraft is designed as a user-friendly Python library for constructing environments tailored to the study of hierarchical reasoning in the contexts of reinforcement learning, classical planning, and program synthesis as displayed in \autoref{fig:HierachyCraft_domain_position}.
 
 Analysis and quantification of the impacts of diverse hierarchical structures on learning agents is essential for advancing hierarchical reasoning.
 However, current hierarchical benchmarks often limit themselves to a single hierarchical structure per benchmark, and present challenges not only due to this inherent hierarchical structure but also because of the necessary representation learning to interpret the inputs.
 
-HierarchyCraft is not a benchmark itself, but a benchmark builder 
-designed to facilitate the study of how different hierarchical structures influence 
-the performance of classical planners and reinforcement learning algorithms. 
-It includes several 
-[examples](https://github.com/IRLL/HierarchyCraft/tree/master/src/hcraft/examples), 
-detailed in 
-[the package documentation](https://irll.github.io/HierarchyCraft/hcraft/examples),
-that can serve as initial benchmarks — those example includes basic parametrised hierarchical structure builders (Random, Recursive, Tower), and fixed one imitating other environments (MineHCraft imitating Minecraft tasks, MiniHCraft imitating Minigrid tasks).
-However, researchers are encouraged to create their own custom hierarchical environments to further explore hierarchical structures of their choice.
-
-We argue that arbitrary hierarchical complexity can emerge from simple rules without the need for learning a representation.
+HierarchyCraft is a benchmark builder (not a benchmark) designed to study how different hierarchical structures impact the performance of classical planners and reinforcement learning algorithms. It includes several [examples](https://github.com/IRLL/HierarchyCraft/tree/master/src/hcraft/examples), detailed in the [documentation](https://irll.github.io/HierarchyCraft/hcraft/examples), that can serve as initial benchmarks. These examples include basic parametrised hierarchical structures (Random, Recursive, Tower) and fixed structures imitating other environments (MineHCraft for Minecraft tasks, MiniHCraft for Minigrid tasks). Researchers are encouraged to create custom hierarchical environments to explore structures of their choice.
+
+We argue that arbitrary hierarchical complexity can emerge from simple rules.
 To the best of our knowledge, no general frameworks currently exist for constructing environments dedicated to studying the hierarchical structure itself. We next highlight five related benchmarks, underscoring the necessity for the development of a tool like HierarchyCraft.
 
 
@@ -76,44 +68,70 @@ To the best of our knowledge, no general frameworks currently exist for construc
 
 GridWorld, a general class of 2D grid-based environments, is frequently utilized in hierarchical reinforcement learning research, notably within the options framework [@sutton1999between].
 
-Minigrid [@minigrid] is a user-friendly Python library that not only implements a GridWorld engine but also expands its capabilities. This allows researchers to create more intricate scenarios by introducing additional rooms, objectives, or obstacles, as illustrated in  \autoref{fig:MinigridHierarchies}. Unfortunately, GridWorld environments typically exhibit a **limited hierarchical structure** and primarily focus on navigation tasks.
-
 ![Example of Minigrid environments hierarchical structures and their relationships. There is only a few possible sub-tasks and most of them are navigation tasks (in green).\label{fig:MinigridHierarchies}](docs/images/MinigridHierarchies.png){ width=100% }
 
 
+Minigrid [@minigrid] is a user-friendly Python library that not only implements a GridWorld engine but also expands its capabilities. This allows researchers to create more intricate scenarios by introducing additional rooms, objectives, or obstacles.
+
+As illustrated in \autoref{fig:MinigridHierarchies}, GridWorld environments only exhibit **limited and similar hierarchical structures** that primarily focus on navigation tasks, making these hierarchies mostly sequential.
+
 ### Minecraft
 
 An exemplary instance of a hierarchical task is the collection of diamonds in the popular video game Minecraft, as showcased in the MineRL competition [@guss2021minerl2020], where hierarchical reinforcement learning agents have dominated the leaderboard[@milani2020minerl2019].
+
 Due to sparse rewards, the difficulty of exploration, and long time horizons in this procedurally generated sandbox environment, DreamerV3 [@dreamerv3] recently became the first algorithm to successfully collect diamonds in Minecraft without prior training or knowledge.
-Unfortunately, DreamerV3 required training on an Nvidia V100 GPU for 17 days, gathering roughly 100 million environmental steps.
-Such **substantial computational resources are unavailable to many researchers**, impeding the overall progress of research on hierarchical reasoning.
-Moreover, although Minecraft has an undeniably complex hierarchical structure, **this underlying hierarchical structure is fixed** and cannot be modified without modding the game, a complex task for researchers.
+Unfortunately, DreamerV3 required training on an Nvidia V100 GPU for 17 days, gathering roughly 100 million environmental steps. Such **substantial computational resources are unavailable to many researchers**, impeding the overall progress of research on hierarchical reasoning.
+
+Moreover, although Minecraft has an [undeniably complex hierarchical structure](https://irll.github.io/HierarchyCraft/hcraft/examples/minecraft.html), **this underlying hierarchical structure is fixed** and cannot be modified without modding the game, a complex task for researchers.
 
 
 ### Crafter
 
-Crafter [@hafner2022benchmarking] presents a lightweight grid-based 2D environment, with game mechanics akin to Minecraft and poses similar challenges including exploration, representation learning, rewards sparsity and long-term reasoning.
-Although Crafter offers 22 different tasks displayed in \autoref{fig:CrafterRequirements}, the **underlying hierarchical structure is fixed**, restricting how researchers can investigate the impacts of changes to the hirerachy.
+Crafter [@hafner2022benchmarking] presents a lightweight grid-based 2D environment, with game mechanics akin to Minecraft and poses similar challenges including exploration, representation learning, rewards sparsity and long-term reasoning at much lower compute cost.
+
+![Hierarchical structure of the Crafter environment as presented by the authors of Crafting with their success rates. Inspired from Figure 4 of [@hafner2022benchmarking].\label{fig:CrafterGraph}](docs/images/CrafterRequirementsGraph.png){ width=80% }
+
+
+Although Crafter offers 22 different tasks displayed in \autoref{fig:CrafterGraph}, the **underlying hierarchical structure is fixed**, restricting how researchers can investigate the impacts of changes to the hierarchical structure.
+
 Moreover, the tasks considered by the authors do not encompass various navigation-related subtasks (such as finding water, locating a cow, waiting for a plant to grow, or returning to a table), nor do they include certain optional but beneficial subtasks (for example, using swords or the skill of dodging arrows can make the task of defeating skeletons easier).
 
 This omission results in abrupt drops in success rates within the hierarchy, rather than a more gradual progression in difficulty. This highlights that the hierarchy presented by the authors is incomplete, as it fails to capture the full range of subtasks in Crafter and the necessary or helpful interactions between them for successfully completing higher-level tasks.
-![Hierarchical structure of the Crafter environment as presented by the authors of Crafting with their success rates. Inspired from Figure 4 of [@hafner2022benchmarking]\label{fig:CrafterRequirements}](docs/images/CrafterRequirementsGraph.png){ width=80% }
+
+
+### Arcade Learning Environment (Atari)
+The arcade learning environment [@ALE] stands as a standard benchmark in reinforcement learning, encompassing over 55 Atari games. However, **only a few of these games, such as Montezuma’s Revenge and Pitfall, necessitate hierarchical reasoning**.
+
+Each Atari games has a **fixed hierarchy that cannot be modified** and agents **demand substantial computational resources** to extract relevant features from pixels or memory, significantly slowing down experiments.
+
+<!-- 
+### NetHack Learning Environment
+
+The NetHack learning environment [@kuttler2020nethack] is an extremely hierarchical environment based on the game NetHack, where the observation is a grid composed of hundreds of possible symbols.
+
+In fact, NetHack is **too complex for agents to learn**, it requires many environment steps for agents to acquire domain-specific knowledge. 10B steps were required for the NeurIPS 2021 NetHack challenge [@2021NetHack], making it impractically long for a benchmark. Moreover, the NetHack game also has a **fixed underlying hierarchy** that cannot be easily modified. -->
 
 
 ### PDDLGym
 
-PDDLGym [@PDDLgym] is a Python library that automatically constructs Gym environments from Planning Domain Definition Language (PDDL) domains and problems. PDDL [@PDDL] functions as a problem specification language, facilitating the comparison of different symbolic planners. However, constructing PDDL domains and problems with a hierarchical structure is challenging and time-consuming, especially for researchers unfamiliar with PDDL-like languages.
+PDDLGym [@PDDLgym] is a Python library that automatically constructs Gym environments from Planning Domain Definition Language (PDDL) domains and problems. PDDL [@PDDL] functions as a problem specification language, facilitating the comparison of different symbolic planners.
+
+However, constructing PDDL domains and problems with a hierarchical structure is challenging and time-consuming, especially for researchers unfamiliar with PDDL-like languages.
+
 Additionally, PDDLGym is **compatible only with PDDL1** and does not support numeric-fluents introduced in PDDL 2.1 that are required to represent quantities in the inventories of HierarchyCraft environments.
 
 
-<!-- ### NetHack Learning Environment
+### Abstraction and Reasoning Corpus (ARC)
 
-The NetHack learning environment [@kuttler2020nethack] is based on the game NetHack, where the observation is a grid composed of hundreds of possible symbols.
-Large numbers of items are randomly placed in each level, making NetHack extremely complex and challenging. In fact, NetHack is **too complex for agents to learn**, it requires many environment steps for agents to acquire domain-specific knowledge. 10B steps were required for the NeurIPS 2021 NetHack challenge [@2021NetHack], making it impractically long for a benchmark. Moreover, the NetHack game also has a **fixed underlying hierarchy** that cannot be easily modified. -->
 
+The Abstraction and Reasoning Corpus (ARC) [@Chollet2019OnTM], is both hierarchical and diverse, as each task exhibiting its own implicit hierarchical structure. However, these hierarchical structures are not explicitly provided within the dataset, such as with shorter programs for each task. Making these hierarchical structures explicit would also contribute significantly to the development of hierarchical reasoning like what HierarchyCraft is trying to achieve.
+
+Much like Gridworld, ARC tasks require feature extraction from 2D grids, leveraging priors related to their spatial nature, which bias the nature of the tasks on that specific data structure.
+
+Additionaly ARC tasks do not emphasize long-term reasoning as they are relatively short compared to tasks within other benchmarks like Minecraft or even Gridworld, this makes underlying hierarchical structures shallow for each task and more wide that deep for the whole corpus.
+
+Partitioning those underlying hierarchical structures and classifying them relatively to the difficulty of finding a solution, independently from the solution's nature, is at the core of HierarchyCraft motivation.
 
-### Arcade Learning Environment (Atari)
-The arcade learning environment [@ALE] stands as a standard benchmark in reinforcement learning, encompassing over 55 Atari games. However, **only a few of these games, such as Montezuma’s Revenge and Pitfall, necessitate hierarchical reasoning**. Each Atari games has a **fixed hierarchy that cannot be modified** and agents **demand substantial computational resources** to extract relevant features from pixels or memory, significantly slowing down experiments.
 
 ## Design goals
 
@@ -122,28 +140,28 @@ HierarchyCraft aims to be a fruitful tool for investigating hierarchical reasoni
 
 ### 1. Hierarchical by design
 
-The action space of HierarchyCraft environments consists of sub-tasks, referred to as *Transformations*, as opposed to detailed movements and controls. But each of *Transformations* has specific requirements to be valid (e.g. have enough of an item, be in the right place), and these requirements may necessitate the execution of other *Transformations* first, inherently creating a hierarchical structure in HierarchyCraft environments.
-This concept  is visually represented by the _Requirements graph_ depicting the hierarchical relationships within each HierarchyCraft environment.
-The _Requirements graph_ is directly constructed from the list of *Transformations* composing the environement, as illustrated in \autoref{fig:TransformationToRequirements}.
-Requirements graphs should be viewed as a generalization of previously observed graphical representations from related works, including  \autoref{fig:CrafterRequirements} and \autoref{fig:MinigridHierarchies}.
+The action space of HierarchyCraft environments consists of sub-tasks, referred to as *[Transformations](https://irll.github.io/HierarchyCraft/hcraft/transformation.html)*, as opposed to detailed movements and controls. But each of *Transformations* has specific requirements to be valid (e.g. have enough of an item, be in the right place), and these requirements may necessitate the execution of other *Transformations* first, inherently creating a hierarchical structure in HierarchyCraft environments.
+This concept  is visually represented by the *Requirements graph* depicting the hierarchical relationships within each HierarchyCraft environment.
+The *Requirements graph* is directly constructed from the list of *Transformations* composing the environement, as illustrated in \autoref{fig:TransformationToRequirements}.
+Requirements graphs should be viewed as a generalization of previously observed graphical representations from related works, including \autoref{fig:CrafterGraph} and \autoref{fig:MinigridHierarchies}.
 
 ![How sub-tasks build a hierarchical structure.\label{fig:TransformationToRequirements}](docs/images/TransformationToRequirementsLarge.png){ width=75% }
 
 
-### 2. No feature extraction needed
-
-In contrast to benchmarks that yield grids, pixel arrays, text, or sound, HierarchyCraft directly provides a low-dimensional representation that does not require the further features extraction, as depicted in Figure \autoref{fig:HierarchyCraftState}.
-This not only saves computational time but also enables researchers to concentrate on hierarchical reasoning, allow the use of classical planning frameworks such as PDDL [@PDDL] or ANML [@ANML], and enables the creation of any arbitrary complex custom environment from a list of *Transformation*, nothing more.
-![HierarchyCraft state is already a compact representation.\label{fig:HierarchyCraftState}](docs/images/HierarchyCraftStateLarge.png){ width=80% }
-
-
-### 3. Easy to use and customize
+### 2. Easy to use and customize
 
 HierarchyCraft is a versatile framework enabling the creation of diverse hierarchical environments.
 The library is designed to be simple and flexible, allowing researchers to define their own hierarchical environments with detailed guidance provided in the documentation.
 To showcase the range of environments possible within HierarchyCraft, multiple examples are provided.
 
 
+### 3. No feature extraction needed
+In contrast to benchmarks that yield grids, pixel arrays, text, or sound, HierarchyCraft directly provides a low-dimensional representation that does not require the further features extraction, as depicted in \autoref{fig:HcraftState}.
+
+![HierarchyCraft compact state representation.\label{fig:HcraftState}](docs/images/HierarchyCraftStateLarge.png){ width=80% }
+
+This not only saves computational time but also enables researchers to concentrate on hierarchical reasoning, allow the use of classical planning frameworks such as PDDL [@PDDL] or ANML [@ANML], and enables the creation of any arbitrary complex custom environment from a list of *Transformation*, nothing more.
+
 ### 4. Compatible with multiple frameworks
 
 HierarchyCraft environments are directly compatible with both reinforcement learning through OpenAI Gym [@gym] and planning through the Unified Planning Framework [@UPF] (see \autoref{fig:HierarchyCraft-pipeline}).