From 5b3589a05626eb50fbbaca939b1e8eab6ffb6c47 Mon Sep 17 00:00:00 2001 From: Murphy Tian Date: Wed, 27 Aug 2025 20:28:59 +0000 Subject: [PATCH 1/4] update traj --- memory.md | 423 ++++++++++++++++++ openmanus_rl/environments/prompts/alfworld.py | 64 +-- test/alfworld_rollout.py | 96 ++-- 3 files changed, 504 insertions(+), 79 deletions(-) create mode 100644 memory.md diff --git a/memory.md b/memory.md new file mode 100644 index 00000000..34064713 --- /dev/null +++ b/memory.md @@ -0,0 +1,423 @@ + +[E:ep_192322|S:1] [Reflection] The last action was moving to cabinet 1. This action brought us closer to accomplishing the task as it positioned us to explore the cabinet for a potential mug. No issues were encountered, so the next logical step is to open the cabinet and examine its contents. + +[E:ep_192327|S:2] [Reflection] The last action opened cabinet 1, but it was empty. While it did not directly provide progress toward the task, it eliminated cabinet 1 as a potential location for the mug. This step narrows down the possible locations for the mug, bringing me closer to completing the task. + +[E:ep_192331|S:3] [Reflection] The last action was to move from cabinet 1 to cabinet 2. It successfully brought us to the next potential location that might contain the clean mug. While we did not accomplish the task directly, this step was necessary for systematically searching the room. We are making progress by eliminating locations one by one. + +[E:ep_192335|S:4] [Reflection] The last action was to open cabinet 2, and the observation revealed that it was empty. While this action did not yield the desired object, it successfully eliminated one of the potential storage locations for the mug, which is progress toward narrowing down the search. We are closer to completing the task by excluding unproductive areas. + +[E:ep_192340|S:5] [Reflection] The last action, "go to cabinet 3," successfully brought me to cabinet 3. Upon observing cabinet 3, I found a bowl but no mug, confirming that the search must continue. This step was productive in eliminating another location where the mug might have been. I am closer to completing the task as I systematically work through the possible locations. + +[E:ep_192343|S:6] [Reflection] The last action, "go to cabinet 4," successfully moved me to cabinet 4. This action was necessary as part of systematically searching cabinets for the mug. While no progress was made toward directly completing the task, it sets up the next step to check for the mug's presence in cabinet 4. + +[E:ep_192348|S:7] [Reflection] Last observation analysis: Opening cabinet 4 revealed a plate but no mug. This action did not directly bring me closer to completing the task but eliminated one possible location for the mug. It was successful in gathering information about potential storage areas. + +[E:ep_192355|S:8] [Reflection] Last observation analysis: +The last action of opening cabinet 5 revealed that it was empty. While this does not directly advance the task, it eliminates one potential location for the clean mug and allows me to focus on other areas. + +What did the last action accomplish? +It confirmed that cabinet 5 does not contain the target object. + +Are we closer to completing the task? +Yes, we are closer in the sense that we are narrowing down the potential locations for the clean mug. + +[E:ep_192358|S:9] [Reflection] The last action, "go to cabinet 6," successfully positioned me at cabinet 6. While I haven't yet identified a mug, I am systematically inspecting the cabinets, which is an effective strategy to locate the required item. This step brings me closer to identifying the mug's location. + +[E:ep_192412|S:1] [Reflection] The last action of going to cabinet 1 was successful in positioning me near a potential storage location for mugs. However, the cabinet is closed, and I still need to open it to check for a clean mug. Progress has been made in locating a potential source of mugs, but the task is not yet completed as the mug has not been identified or retrieved. + +[E:ep_192418|S:2] [Reflection] The last action (opening cabinet 1) successfully revealed that cabinet 1 is empty. While this did not directly move us closer to completing the task, it eliminated one possible location for the mug, refining our search strategy. We are making progress through systematic elimination of potential locations. + +[E:ep_192421|S:3] [Reflection] The previous action was going to cabinet 2. This action moved me closer to systematically checking cabinets for a clean mug. While cabinet 1 was devoid of items, checking each cabinet sequentially ensures thorough exploration. Opening cabinet 2 is the logical next step to continue this search. + +[E:ep_192427|S:4] [Reflection] Last observation analysis: Have we made progress toward solving the task? +Yes, we are systematically eliminating cabinets as potential locations for the mug. This helps narrow down the search. + +What did the last action accomplish? Was it successful or did it encounter any issues? +The last action successfully opened Cabinet 2 and confirmed that it was empty. No issues were encountered. + +Are we closer to completing the task? +While we have not yet found the mug, the systematic search reduces the number of locations to check, bringing us closer to finding the object. + +[E:ep_192430|S:5] [Reflection] The last action, "go to cabinet 3," successfully positioned me in front of cabinet 3, where I observed a bowl but no mug. This confirms progress in systematically searching the cabinets and eliminating potential locations for the mug. Although the desired object was not found, the action did not encounter any issues. + +[E:ep_192435|S:6] [Reflection] Last observation analysis: My previous action was to go to cabinet 4. This action successfully brought me to the cabinet, and now I am positioned to interact with it. Progress is steady as I systematically search for the clean mug. + +What did the last action accomplish? It positioned me near cabinet 4, which is a potential storage location for the mug. + +Are we closer to completing the task? Yes, as I am actively investigating potential storage locations to locate the mug. + +[E:ep_192439|S:7] [Reflection] The last action involved opening cabinet 4, which revealed a plate. While this did not directly advance the task, it successfully eliminated cabinet 4 as a possible location for a mug. This systematic process ensures I thoroughly search each storage area for the required item. Progress is made by narrowing down potential locations and focusing on unexplored areas. + +[E:ep_192444|S:8] [Reflection] The last action, "go to cabinet 5," did not yield any immediate progress, as nothing happened. However, it positioned me to interact with cabinet 5, which is a logical next place to search for the mug. The task remains incomplete, but I am systematically narrowing down potential locations for the mug. + +[E:ep_192449|S:9] [Reflection] The last action, "open cabinet 5," was successful in revealing its contents, but it did not contribute directly to the task since the cabinet was empty. This confirms that I need to continue searching other storage locations systematically. I am not yet closer to completing the task but have eliminated cabinet 5 as a potential source of the mug. + +[E:ep_192501|S:1] [Reflection] Last observation analysis: The last action successfully moved me to cabinet 1, which is a likely storage area for mugs. The cabinet is currently closed, so I need to open it to check its contents. This is a step forward in locating the mug. + +[E:ep_192504|S:2] [Reflection] The last action "open cabinet 1" revealed that cabinet 1 is empty, meaning it did not contain the mug we need. While this action did not directly advance the task, it ruled out one location, narrowing down the possible places where the mug might be found. This is progress in systematically searching for the target object. + +[E:ep_192508|S:3] [Reflection] The last action ("go to cabinet 2") successfully brought me to cabinet 2. While it did not directly accomplish the goal of finding the mug, it was an essential step for systematically searching the cabinets. This action aligns with the plan to explore cabinets for the mug. + +[E:ep_192512|S:4] [Reflection] The last action, "open cabinet 2," was successful in revealing that cabinet 2 is empty. While it did not directly result in progress toward finding the mug, it eliminated one possible location, narrowing down the search. + +[E:ep_192522|S:5] [Reflection] In the last action, I moved to Cabinet 3. This action allowed me to inspect its contents, revealing that it contains a bowl but not a mug. While this does not directly progress toward the goal of finding a mug, it eliminates one potential location and narrows down the search. Progress has been made in systematically exploring the environment. + +[E:ep_192531|S:6] [Reflection] Last observation analysis: The last action was "go to cabinet 4," and it successfully brought me to the location of cabinet 4. The cabinet is currently closed, so I cannot see its contents yet. Progress has been made by systematically moving through potential storage areas to locate a mug. + +What did the last action accomplish? It allowed me to reach cabinet 4, setting up the next logical step of opening it to inspect its contents. + +Are we closer to completing the task? Yes, we are systematically searching for the mug, and this action prepares us to explore the contents of cabinet 4. + +[E:ep_192538|S:7] [Reflection] The last action of opening cabinet 4 allowed me to inspect its contents, revealing a plate but not a mug. This confirms that I need to continue exploring other cabinets or storage locations to find the required mug. The action was successful in narrowing down the possible locations where a mug might be found, but it did not directly bring me closer to completing the task as no mug was found. + +[E:ep_192543|S:8] [Reflection] The last action, "go to cabinet 5," successfully moved me to the next cabinet in my systematic search. However, no further progress was made as I have not yet inspected or interacted with this cabinet. The action was successful in positioning me for the next step, but no new information or items were acquired yet. + +[E:ep_192547|S:9] [Reflection] The last action (opening cabinet 5) was successful in revealing its contents, but cabinet 5 did not contain any items. While the action itself executed properly, it did not directly advance the task since no mug was found. However, eliminating cabinet 5 from the search narrows down the potential locations to find a mug. + +[E:ep_192558|S:1] [Reflection] The last action was "go to cabinet 1." This action successfully moved me closer to cabinet 1, which is now within reach. The action was a necessary preliminary step in exploring potential storage locations for the mug. I am now positioned to interact with the cabinet, moving forward in the task. + +[E:ep_192602|S:2] [Reflection] The last action, "open cabinet 1," was successful in determining that cabinet 1 does not contain any items, including a mug. This action helped eliminate cabinet 1 as a potential location for the mug. While no progress was made in finding the mug, the search process is narrowing the possibilities. We are methodically moving closer to completing the task. + +[E:ep_192607|S:3] [Reflection] Last observation analysis: The last action ("go to cabinet 2") successfully moved me to cabinet 2, where I now have the option to open it and examine its contents. +What did the last action accomplish? It positioned me at cabinet 2, enabling me to investigate it further. +Are we closer to completing the task? Yes, as I am systematically searching for the mug needed to place in the coffeemachine. + +[E:ep_192613|S:4] [Reflection] The last action was to open cabinet 2. It was successful, but the cabinet was found to be empty. While this did not directly bring us closer to finding the clean mug, it helped narrow down the potential locations where the mug could be stored. Progress is still being made in systematically searching the environment. + +[E:ep_192618|S:5] [Reflection] The last action, "go to cabinet 3," successfully moved me to the cabinet, and the observation revealed that this cabinet contains a bowl but not a mug. While the action did not directly bring me closer to the task goal, it eliminated one more location from the search process, narrowing down where the clean mug might be. + +[E:ep_192621|S:6] [Reflection] The last action was "go to cabinet 4," and it successfully brought me to the location of cabinet 4. This action was successful and aligns with the plan to systematically search cabinets for a clean mug. It has brought me closer to finding the target object necessary for completing the task. + +[E:ep_192627|S:7] [Reflection] Last observation analysis: +The last action of opening cabinet 4 revealed a plate inside, but not the target item (a clean mug). This indicates that my search needs to continue. + +What did the last action accomplish? +It confirmed that cabinet 4 does not contain the mug, narrowing down possible locations for the mug. + +Are we closer to completing the task? +Yes, by eliminating cabinet 4 as a possible location for the mug, I am making progress toward finding the item needed to complete the task. + +[E:ep_192630|S:8] [Reflection] The last action involved moving to cabinet 5, but the observation "Nothing happens" indicates that I have arrived at cabinet 5, and I haven't interacted with it yet. Progress has been made in systematically searching for the mug, but I haven’t identified the target object yet. The task remains incomplete. + +[E:ep_192635|S:9] [Reflection] The last action was "open cabinet 5," and its observation revealed that cabinet 5 is empty. While this does not directly progress toward finding the mug, it narrows down the possible locations for the mug and eliminates cabinet 5 as a potential source. This systematic exploration is necessary for completing the task. + +[E:ep_192647|S:1] [Reflection] The last action was "go to cabinet 1," which successfully moved me to the location of cabinet 1. This was a necessary step to start searching for the clean mug. Progress has been made as I am now positioned to interact with the cabinet and examine its contents. + +[E:ep_192651|S:2] [Reflection] Last observation analysis: Opening cabinet 1 revealed that it is empty. This action confirmed that cabinet 1 does not contain the mug, which helps narrow down potential locations for the mug. Progress was made in eliminating one possible location. +What did the last action accomplish? It successfully opened cabinet 1 and confirmed its contents (or lack thereof). +Are we closer to completing the task? Yes, narrowing down potential locations brings us closer to finding the mug and completing the task. + +[E:ep_192655|S:3] [Reflection] The last action was "go to cabinet 2," which successfully moved me to this location. This is progress since finding a clean mug is the first step toward completing the task. However, the cabinet is closed, so I have not yet determined its contents. + +[E:ep_192659|S:4] [Reflection] The last action was "open cabinet 2," and the observation revealed that cabinet 2 was empty. This indicates that our search for the mug must continue. While the last action did not directly advance the task, it was necessary to rule out cabinet 2 as a storage location for the mug. We are still progressing systematically toward finding the mug. + +[E:ep_192703|S:5] [Reflection] The last action, "go to cabinet 3," allowed me to inspect its contents. Although cabinet 3 contains a bowl, it does not contain the required mug, meaning progress toward the task has not yet been made. However, systematically checking the cabinets is essential for locating the mug, and the process is progressing as planned. + +[E:ep_192706|S:6] [Reflection] The last action involved moving to cabinet 4. This action successfully positioned us to interact with cabinet 4, which is currently closed. We did not yet make progress toward identifying or obtaining a clean mug but are well-positioned to explore this cabinet next. + +[E:ep_192711|S:7] [Reflection] Last observation analysis: +The last action was to open cabinet 4, revealing its contents (a plate 2). This was successful but did not bring us closer to finding the mug. It helped eliminate one storage location as a possible source of the mug. + +What did the last action accomplish? +The last action helped us confirm that cabinet 4 does not contain the mug, allowing us to redirect our search to other locations. + +Are we closer to completing the task? +Not significantly closer, but the search is progressing systematically toward locating the mug. + +[E:ep_192715|S:8] [Reflection] The last action ("go to cabinet 5") did not yield any progress; the observation ("Nothing happens") indicates that cabinet 5 does not contain the target object. While exploring cabinets is logical, the approach may need adjustment to prioritize areas more likely to hold mugs, such as drawers or countertops. + +[E:ep_192720|S:9] [Reflection] The last action was "go to drawer 1," which resulted in arriving at the drawer. The drawer is currently closed, so no progress has been made toward finding the mug yet. However, this action has positioned me to interact with drawer 1, which is the next logical step in the search for the mug. + +[E:ep_192733|S:1] [Reflection] The last action was "go to cabinet 1," which successfully brought me to the location of cabinet 1. This action was necessary and productive as it positioned me at a potential source for the clean mug. Progress has been made as I have moved closer to identifying the location of the mug. + +[E:ep_192737|S:2] [Reflection] The last action (opening cabinet 1) was successful in verifying its contents, but it did not yield the desired item (a clean mug). This was expected progress in systematically searching locations. While we are not yet closer to completing the task, we are narrowing down possible locations for the mug. + +[E:ep_192740|S:3] [Reflection] The last action involved moving to cabinet 2 after determining that cabinet 1 was empty. This action was successful in positioning me at a new location to continue the search for the mug. While progress was made in checking another possible storage location, I have not yet located the mug. This action brought me closer to solving the task by systematically exploring storage areas. + +[E:ep_192744|S:4] [Reflection] The last action, opening cabinet 2, revealed that it is empty. While it did not directly progress toward finding the clean mug, it eliminated one possible location, narrowing down the search. We are still on track toward completing the task but need to keep searching for the mug. + +[E:ep_192747|S:5] [Reflection] The last action successfully brought me to cabinet 3, where I discovered a bowl. While this does not directly contribute to solving the task, it rules out cabinet 3 as a location for the mug, narrowing down the search space. + +[E:ep_192751|S:6] [Reflection] Last observation analysis: The last action was "go to cabinet 4," which successfully positioned me at cabinet 4, a new location. This was a logical step in systematically exploring potential storage areas for the mug. While progress was made in terms of moving to a new cabinet, I have not yet located the mug. + +What did the last action accomplish? It moved me closer to potentially finding the mug. + +Are we closer to completing the task? Not yet, but the exploration process is ongoing, and opening cabinet 4 might yield the required item. + +[E:ep_192755|S:7] [Reflection] The last action was opening cabinet 4. While it revealed a plate, it did not help us locate the mug, which is our target object. The action confirmed that cabinet 4 does not contain a mug, allowing us to rule it out and focus on other containers in the room. We are still searching for the mug, but the systematic approach ensures progress toward completing the task. + +[E:ep_192759|S:8] [Reflection] Last observation analysis: The last action ("go to cabinet 5") resulted in "Nothing happens," indicating I have navigated to cabinet 5 but not interacted with it yet. +What did the last action accomplish? It positioned me near cabinet 5, making it accessible for interaction. +Are we closer to completing the task? Yes, as I am systematically exploring potential locations for the mug. + +[E:ep_192802|S:9] [Reflection] The last action, "open cabinet 5," revealed that cabinet 5 is empty. While this did not directly advance the task, it eliminated one possible location for the mug. This systematic approach helps ensure that I check all possible storage locations methodically. + +[E:ep_192814|S:1] [Reflection] The last action was to "go to cabinet 1," which successfully positioned me at cabinet 1. This is progress because I am now at a location that could potentially store the mug I need. With cabinet 1 closed, the next logical step is to open it and examine its contents. + +[E:ep_192821|S:2] [Reflection] Last observation analysis: Upon opening cabinet 1, I discovered that it was empty and did not contain the target object (a mug). This indicates that the mug is not in this location, and further search is required. + +What did the last action accomplish? The last action (opening cabinet 1) successfully revealed the cabinet's contents, confirming that the mug is not here. + +Are we closer to completing the task? While the last action did not bring us directly closer to the task, it eliminated one possibility, helping to narrow down the search. + +[E:ep_192827|S:3] [Reflection] Last observation analysis: The last action was to go to cabinet 2. As a result, I am now at cabinet 2, which is closed. This action successfully brought me to the next storage location to investigate for the clean mug. + +Progress toward the task: By systematically moving to cabinet 2, I am continuing to narrow down potential locations for the clean mug. This aligns with the plan to thoroughly search for the item. + +[E:ep_192830|S:4] [Reflection] The last action involved opening cabinet 2 after arriving at it. The observation revealed that cabinet 2 was empty, meaning no progress was made in terms of locating the mug. However, this action systematically ruled out cabinet 2 as a potential location for the mug, narrowing the options for where the mug could be located. While this did not directly move us closer to completing the task, it was necessary to eliminate possibilities. + +[E:ep_192835|S:5] [Reflection] Last observation analysis: +The last action, "go to cabinet 3," successfully brought me to cabinet 3, but the observation revealed only a bowl and no mug. While it did not directly advance the task, it confirmed that cabinet 3 does not contain a clean mug, allowing me to eliminate it from consideration. + +Progress: +We are methodically searching the environment, which is a necessary step to locate the clean mug. + +[E:ep_192839|S:6] [Reflection] Last observation analysis: +The last action was "go to cabinet 4." This action successfully brought me to the next cabinet for investigation. It was a necessary step in systematically searching cabinets to locate the mug. + +Progress toward task: +I have systematically worked through several cabinets. Although the mug has not yet been found, the exploration strategy is sound, and I am closer to completing the task. + +[E:ep_192842|S:7] [Reflection] The last action was to open cabinet 4, revealing its contents to be a plate 2. While this did not yield a clean mug, it successfully ruled out cabinet 4 as a potential location for the mug. This systematic approach ensures progress toward locating the required object. + +[E:ep_192845|S:8] [Reflection] The last action opened Cabinet 5, which revealed that it was empty. This action did not directly advance the task, as no useful object was discovered. However, it narrowed down the list of potential locations for the mug, which is still progress toward completing the objective. + +[E:ep_192849|S:9] [Reflection] The last action was "go to cabinet 6," and it successfully positioned me at cabinet 6, enabling me to inspect it next. Progress is being made, as I am systematically searching potential storage locations for the clean mug. While I have not yet found the mug, I am narrowing down the possibilities. + +[E:ep_192901|S:1] [Reflection] The last action was "go to cabinet 1," which successfully brought me to the cabinet. It was a logical first step in narrowing down the location of a clean mug. This action moved me closer to completing the task by positioning me at a likely storage location for mugs. There were no issues encountered during this step. + +[E:ep_192906|S:2] [Reflection] The last action was opening cabinet 1, which revealed that it was empty. This action was successful in confirming that cabinet 1 does not contain a mug, but it did not directly advance the task of finding a mug. Progress has been made by eliminating one possible storage location, and we are closer to completing the task as we systematically search other cabinets. + +[E:ep_192909|S:3] [Reflection] The last action was "go to cabinet 2". This action brought me closer to systematically searching the cabinets for a clean mug. While it did not directly accomplish the task, it was successful in moving towards the next logical step in item discovery. I am now positioned to interact with cabinet 2. + +[E:ep_192914|S:4] [Reflection] The last action, "open cabinet 2," successfully revealed that cabinet 2 is empty. While it did not yield the mug, it eliminated this location from the search space, which is progress. We are systematically narrowing down where the mug might be. This action aligns with the goal of efficiently locating the necessary item. + +[E:ep_192918|S:5] [Reflection] The previous action, "go to cabinet 3," successfully brought me to cabinet 3 and allowed me to observe its contents. However, it did not contain the mug I am searching for. This observation helps narrow down where the mug might be located. Progress toward the task is being made as I continue the search systematically. + +[E:ep_192924|S:6] [Reflection] Last observation analysis: The last action was "go to cabinet 4," which successfully moved me to this cabinet. However, no progress has been made yet toward finding a mug. The previous actions (e.g., opening cabinets 2 and 3) revealed that they did not contain the target item. Thus, I need to continue searching systematically. + +[E:ep_192928|S:7] [Reflection] The last action was successful in opening cabinet 4, but it did not yield the desired object (a clean mug). We are making steady progress in systematically searching the environment for the mug. Although we haven't found the mug yet, each cabinet we check narrows down the possible locations. + +[E:ep_192933|S:8] [Reflection] The last action was to go to cabinet 5, but it did not reveal any new useful information because no interaction occurred. While I am systematically exploring the environment, opening the cabinet is necessary to check if it contains the target object (a mug). + +[E:ep_192937|S:9] [Reflection] The last action was to open cabinet 5, and the observation revealed that it contains nothing. This action was not directly helpful in finding the mug, but it eliminated cabinet 5 as a possible location for the mug, narrowing down the search area. We are still progressing systematically toward completing the task. + +[E:ep_192950|S:1] [Reflection] Last observation analysis: The last action, "go to cabinet 1," successfully moved me to cabinet 1, bringing me closer to potentially finding a mug. Progress has been made toward solving the task by narrowing down the search location. +What did the last action accomplish? It positioned me at cabinet 1, allowing me to interact with it and investigate its contents. +Are we closer to completing the task? Yes, I am now at a specific location where mugs may be stored. + +[E:ep_192954|S:2] [Reflection] The last action I took was to open cabinet 1, which revealed that the cabinet was empty and did not contain a clean mug. This action confirmed that cabinet 1 is not useful for the task and redirected my search to other locations. While there was no immediate progress toward completing the task, it eliminated one possibility and helped narrow down the search. + +[E:ep_193000|S:3] [Reflection] The last action was "go to cabinet 2," which successfully brought me to the location of cabinet 2. This action allowed me to continue my search for a clean mug by positioning me at a potential storage location. The progress so far has been methodical, as I have been systematically checking cabinets for the desired item. While no clean mug has been found yet, the search is ongoing and aligned with the task's requirements. + +[E:ep_193003|S:4] [Reflection] The last action, "open cabinet 2," revealed that cabinet 2 is empty. While this did not directly progress the task, it eliminated one potential location where the clean mug could be stored. This systematic search strategy keeps the process organized and thorough, ensuring we don't miss the mug. + +[E:ep_193009|S:5] [Reflection] Last observation analysis: The last action, "go to cabinet 3," successfully placed me at cabinet 3. However, upon observation, cabinet 3 contains a bowl, which is not useful for my task of finding a clean mug. This suggests that I need to continue searching other locations for the mug. +Progress evaluation: While I have not yet found the mug, I am systematically narrowing down potential storage locations, which is essential for completing the task. + +[E:ep_193013|S:6] [Reflection] The last action was "go to cabinet 4," which brought me to this location. The observation reveals that cabinet 4 is closed, and I need to interact with it to check its contents. The action successfully advanced my exploration of potential mug storage locations. + +[E:ep_193018|S:7] [Reflection] Last observation analysis: Opening cabinet 4 revealed a plate but did not reveal any mug. This indicates that the mug is likely stored elsewhere, and my systematic search must continue. +What did the last action accomplish? It successfully opened the cabinet, allowing me to examine its contents. +Are we closer to completing the task? Yes, as we are eliminating potential storage locations for the mug and narrowing down where it might be stored. + +[E:ep_193022|S:8] [Reflection] The last action to "go to cabinet 5" did not yield any results or progress based on the observation "Nothing happens." This suggests that I need to interact with cabinet 5 to potentially find relevant items. Although the action did not directly bring me closer to completing the task, it positioned me correctly for the next step. + +[E:ep_193027|S:9] [Reflection] The last action involved opening cabinet 5 to inspect its contents. The cabinet was found to be empty, meaning no progress was made in finding the mug. However, this step helps narrow down potential locations for the mug, as I now know it is not in cabinet 5. The search is still ongoing, and I am systematically eliminating options. + +[E:ep_193038|S:1] [Reflection] The last action was "go to cabinet 1," which successfully positioned me near cabinet 1. This was an essential step toward finding the clean mug, as cabinets often store kitchenware. Progress has been made, as I am now ready to interact with cabinet 1 to explore its contents. + +[E:ep_193050|S:2] [Reflection] The last action, "open cabinet 1," successfully revealed that cabinet 1 is empty. While it did not directly help in finding the clean mug, it eliminated one possible location, narrowing down the search. This is a small but essential step toward completing the task, as it ensures we focus on other areas where the mug might be stored. + +[E:ep_193054|S:3] [Reflection] The last action was "go to cabinet 2," which successfully positioned me in front of the cabinet. This action was a step forward as it allowed me to focus on a specific area where a mug might be stored. However, the cabinet is currently closed, so further actions are needed to examine its contents. Progress has been made in narrowing down the search area, but the mug has not yet been located. + +[E:ep_193058|S:4] [Reflection] The last observation revealed that Cabinet 2 is open and contains no items. This means the search for the mug must continue. The last action (opening Cabinet 2) was successful but did not yield the desired item. We are progressing methodically by eliminating possible storage locations for the mug. + +[E:ep_193103|S:5] [Reflection] The last action was moving to cabinet 3. The result of this action revealed a bowl, which is not the item needed for the task. While this action did not directly progress the task, it contributed to systematically eliminating potential storage locations for the mug. This step was successful in that it confirmed cabinet 3 does not contain the target item. + +[E:ep_193109|S:6] [Reflection] The last action, "go to cabinet 4," successfully brought me to the next cabinet in my systematic search. This action sets up the opportunity to check this cabinet for the clean mug. Although progress toward the ultimate goal was not achieved yet (since no mug was found in the previous cabinets), I am continuing to explore methodically, which is necessary for completing the task. + +[E:ep_193112|S:7] [Reflection] Last observation analysis: Opening cabinet 4 revealed a plate, but not the desired mug. Progress toward solving the task was made in terms of eliminating one location where the mug might be stored. The action was successful in terms of accessing the cabinet's contents, but it did not yield the desired item. We are closer to completing the task by narrowing down the search. + +[E:ep_193116|S:8] [Reflection] The last action of going to cabinet 5 did not yield a useful result, as the observation was "Nothing happens." While this step did not advance my progress directly, it confirmed that cabinet 5 might need examination to verify its contents. Thus, I am not yet closer to completing the task, but I am maintaining systematic progress by inspecting potential storage locations for the mug. + +[E:ep_193120|S:9] [Reflection] The last action was "examine cabinet 5," which confirmed that cabinet 5 is closed but did not yield new information or progress toward the task. Opening cabinet 5 is necessary to investigate its contents and potentially find a mug. This action will help determine whether cabinet 5 contains a relevant item for the task. + +[E:ep_200300|S:1] [Reflection] - My last action successfully brought me to cabinet 1, advancing my plan to locate a clean mug. +- However, since the cabinet is closed, I cannot yet determine if it contains a mug or any other useful items. +- Opening the cabinet is the next logical step to continue the search. + +[E:ep_200304|S:2] [Reflection] - The last action of opening cabinet 1 did not yield a clean mug, but it was a necessary step in the process of systematically searching through potential storage locations. +- The outcome reinforces the need to continue searching other cabinets or drawers in the room. +- I am still progressing toward the task goal by eliminating non-viable storage locations. + +[E:ep_200308|S:3] [Reflection] - My last action successfully brought me to cabinet 2, advancing my plan to locate a clean mug. +- Since the cabinet is closed, I cannot yet determine if it holds any useful items. Opening it is the next logical step. +- I am systematically eliminating potential storage locations, which is an efficient approach to the task. + +[E:ep_200311|S:4] [Reflection] - Opening cabinet 2 did not yield a clean mug, but this action helped eliminate another potential storage location. +- While no progress was made directly toward finding the mug, systematically checking cabinets ensures I am methodically searching the environment. +- I need to continue with the plan of searching other cabinets or drawers until I find the mug. + +[E:ep_200315|S:5] [Reflection] - My last action successfully brought me to cabinet 3, advancing the plan to systematically search all potential storage locations for a clean mug. +- Although cabinet 3 has a bowl visible, it does not contain the object required to complete my task (a mug). I need to continue searching other cabinets or storage locations. +- The systematic search approach is still valid and necessary, as I have not yet found the required mug. + +[E:ep_200319|S:6] [Reflection] - My last action successfully brought me to cabinet 4, advancing the plan to locate a clean mug. +- Since cabinet 4 is currently closed, I need to open it to inspect its contents and determine if it contains a mug. +- The systematic search approach remains valid, as it ensures thorough exploration of all potential storage locations. + +[E:ep_200322|S:7] [Reflection] - Opening cabinet 4 has ruled it out as a storage location for the clean mug since it only contains a plate. +- The systematic approach is still valid, as it ensures I thoroughly explore potential storage locations. +- I need to continue the search by moving to the next cabinet or storage location. + +[E:ep_200326|S:8] [Reflection] - My attempt to move to cabinet 5 did not result in any progress. This indicates that I must explicitly open cabinet 5 to inspect its contents. +- The systematic approach of moving through and inspecting each cabinet remains valid and effective, though further adjustments may be necessary to ensure progress. + +[E:ep_200332|S:9] [Reflection] - Opening cabinet 5 confirmed it is not a viable location for the clean mug. This further narrows down the potential locations for the item. +- The systematic approach remains effective, as it ensures I methodically check all potential storage areas in the environment. +- Progress is being made toward completing the task, albeit slowly, as I continue to eliminate empty locations. + +[E:ep_200345|S:1] [Reflection] - My last action successfully positioned me at cabinet 1, which is a good starting point for the task. However, the cabinet is closed, and I need to open it to search for a clean mug. +- The current outcome aligns with my plan to methodically search storage areas until I find the mug. I am making progress toward the task goal. +- Opening the cabinet will reveal its contents, allowing me to determine if the clean mug is inside. + +[E:ep_200348|S:2] [Reflection] - My action to open cabinet 1 was successful in revealing its contents, but it did not advance my task since the cabinet is empty. +- This outcome indicates that I need to explore other potential storage locations systematically until I find the mug. +- While this step did not directly contribute to completing the task, it eliminates cabinet 1 as a possible location, narrowing down my search area. + +[E:ep_200354|S:3] [Reflection] - My last action successfully brought me to cabinet 2, the next logical place to search for a mug. +- Although I have not yet made direct progress toward finding the mug, this systematic search strategy is efficient and ensures thoroughness. +- The outcome of this step confirms my focus on systematically checking cabinets to locate the required item. + +[E:ep_200356|S:4] [Reflection] - My action to open cabinet 2 successfully revealed its contents, but it did not help me find the clean mug as the cabinet was empty. +- This outcome reinforces the need to continue searching other storage locations systematically. The plan is still progressing, as I am narrowing down possible locations for the mug. +- I need to remain consistent in my approach to ensure that no potential storage location is overlooked. + +[E:ep_200402|S:5] [Reflection] - My action to move to cabinet 3 brought me to the next logical storage location in my systematic search. This step has allowed me to observe the presence of a bowl in this cabinet. +- While the bowl is not the item I am looking for, observing it confirms that there might still be other items inside or around cabinet 3 that need closer inspection. +- I am making progress by systematically eliminating cabinets and ensuring that no potential storage location is missed. + +[E:ep_200406|S:6] [Reflection] - My action of examining cabinet 3 has reconfirmed that the cabinet only holds a bowl, and no clean mug is present here. +- This observation means that cabinet 3 can also be eliminated as a potential location for the clean mug. +- While I did not find the mug, I am making progress by methodically narrowing down the search area. + +[E:ep_200409|S:7] [Reflection] - My action to move to cabinet 4 was successful in positioning me at the next logical storage location. However, since the cabinet is closed, I need to open it to determine whether it contains the clean mug. +- Although the mug has not yet been found, my systematic approach is ensuring that every potential storage location is thoroughly examined. +- Opening cabinet 4 will allow me to confirm whether the mug is present or whether I need to continue searching elsewhere. + +[E:ep_200411|S:8] [Reflection] - My action to open cabinet 4 successfully revealed its contents, but it did not contain the clean mug. Instead, it contains a plate (plate 2). +- While I have not yet located the mug, I am continuing to narrow down possible locations through my systematic search strategy. +- With cabinet 4 eliminated as a potential storage location for the mug, I need to move on to the next cabinet. + +[E:ep_200417|S:9] [Reflection] - My action to open cabinet 5 was successful in determining that it is empty. This eliminates another potential location for the mug. +- While I have not found the mug, my systematic approach is ensuring that all potential locations are thoroughly checked. +- I am making progress by narrowing down the list of possible storage areas. + +[E:ep_200428|S:1] [Reflection] - My last action successfully brought me to drawer 1, advancing my plan to search for a clean mug. +- The drawer is closed, and I cannot see its contents yet. To proceed, I need to open the drawer and inspect its contents. +- This action aligns with my goal of locating a clean mug, as drawers are a potential storage location for such items. + +[E:ep_200431|S:2] [Reflection] - My last action (opening drawer 1) was successful but did not provide the desired outcome, as the drawer was empty. +- This outcome indicates that drawer 1 is not a viable source for the clean mug, and I need to redirect my search to other locations. +- I am still making progress toward the task goal by systematically eliminating potential storage locations. + +[E:ep_200435|S:3] [Reflection] - My last action successfully brought me to cabinet 1, advancing my search for the clean mug. +- Since the cabinet is closed, I cannot yet access or see its contents. Therefore, the next step in my search process is to open cabinet 1. +- This action aligns with my overall plan to systematically search storage areas for the mug. + +[E:ep_200438|S:4] [Reflection] - My last action (opening cabinet 1) was successful, as the cabinet is now open and inspected. However, cabinet 1 was empty, so I need to redirect my search. +- This outcome reinforces the importance of continuing a systematic search of other areas to locate the clean mug. +- I am still making progress toward the task by eliminating potential storage locations step by step. + +[E:ep_200442|S:5] [Reflection] - My last action successfully brought me to cabinet 2, progressing my systematic search for the clean mug. +- Since cabinet 2 is closed, I cannot yet determine whether it contains the mug. The next step is to open it and inspect its contents. +- So far, my systematic approach has been effective, even though I haven’t yet found the mug. This method ensures thorough exploration of the environment. + +[E:ep_200446|S:6] [Reflection] - My last action (opening cabinet 2) was successful in that it allowed me to inspect the cabinet's contents. However, cabinet 2 was empty, meaning I need to continue searching other locations. +- This outcome reinforces the need to systematically eliminate potential storage areas to find the clean mug. +- I am still making progress by narrowing down the possible locations for the mug. + +[E:ep_200450|S:7] [Reflection] - My last action (moving to cabinet 3) was partially successful, as I discovered an object (bowl 1), but it is not the item I am searching for. +- The presence of the bowl suggests that cabinets may contain kitchenware, but not all cabinets will necessarily have the clean mug. +- This outcome reinforces the need to continue my systematic search of other cabinets or storage areas. + +[E:ep_200453|S:8] [Reflection] - My last action (moving to cabinet 4) was successful in that it brought me to the next potential storage area for the clean mug. +- Since cabinet 4 is closed, I cannot yet determine whether it contains the mug. The next logical step is to open it and inspect its contents. +- My systematic approach is progressing well, as I am narrowing down potential locations for the mug. + +[E:ep_200459|S:9] [Reflection] - My last action (opening cabinet 4) was successful in that it allowed me to inspect the cabinet's contents. However, cabinet 4 contained a plate (plate 2), not the clean mug. +- This reinforces the need to continue my systematic search of cabinets and other potential storage areas. +- I am still making progress by systematically eliminating potential locations for the mug. + +[E:ep_200512|S:1] [Reflection] - My last action successfully brought me to cabinet 1, which is a logical step toward finding the mug. +- Since the cabinet is closed, my immediate task is to open it to check if it contains a mug. +- This outcome aligns with my plan and moves me closer to completing the task. + +[E:ep_200516|S:2] [Reflection] - The action of opening cabinet 1 was necessary to investigate its contents, but it did not yield the desired result as the cabinet was empty. +- This outcome confirms that cabinet 1 does not contain the mug I am looking for, and I need to expand my search to other storage locations. +- While this step did not directly advance the task, it helped eliminate one location from consideration. + +[E:ep_200520|S:3] [Reflection] - Moving to cabinet 2 aligns with my plan of systematically searching cabinets for the mug. Cabinet 1 was empty, so it was logical to proceed to the next cabinet. +- This action of opening the next cabinet will help me continue eliminating or identifying potential locations for the mug. +- While cabinet 1 did not contain the mug, the search process is progressing as planned. + +[E:ep_200523|S:4] [Reflection] - Opening cabinet 2 was a necessary step to systematically search for the mug, but it did not contain the item I need. +- This outcome further narrows down potential locations for the mug and reinforces the need to continue the search. +- While I have not yet made direct progress toward the task goal, eliminating empty cabinets is part of the systematic process. + +[E:ep_200526|S:5] [Reflection] - While cabinet 3 contains a bowl, it does not appear to contain a mug. This observation means cabinet 3 is not useful for completing the task. +- The presence of the bowl does not advance my task, so I should continue my search for the mug in other cabinets or storage locations. +- The systematic search process is still valid, as I am eliminating possible locations one by one. + +[E:ep_200529|S:6] [Reflection] - My decision to move to cabinet 4 follows the systematic search strategy, as previous cabinets did not contain the mug. Opening cabinet 4 will either reveal the item I need or eliminate another location from consideration. +- The task requires persistence, as the mug has not yet been found. This step is crucial to maintaining the systematic approach and ensuring all potential locations are checked. + +[E:ep_200532|S:7] [Reflection] - Opening cabinet 4 was a necessary step in the process, but it revealed a plate instead of a mug. This outcome means I need to continue searching other cabinets or storage areas. +- While this step did not directly advance the task, eliminating cabinet 4 as a potential location narrows down the search and keeps the process systematic. +- I must remain persistent and continue checking other cabinets or explore alternative storage locations like drawers or countertops if necessary. + +[E:ep_200536|S:8] [Reflection] - Opening cabinet 5 was a necessary step in the search process, but like cabinets 1 and 2, it was empty. At this point, I have eliminated five cabinets as potential storage locations for the mug. +- This outcome suggests I need to continue searching additional cabinets or consider shifting to alternate storage areas such as drawers or countertops if the cabinets continue to fail. +- Although progress has been slow, each action contributes to systematically narrowing down potential locations. + +[E:ep_200540|S:9] [Reflection] - Moving to cabinet 6 follows the systematic search strategy I have been using. While previous cabinets have not contained the mug, continuing the search is necessary to identify potential storage locations. +- Opening cabinet 6 is the next logical action to either locate the mug or eliminate it as a possible location. +- Persistence in this step-by-step search ensures that I do not overlook any potential storage areas. + +[E:ep_200552|S:1] [Reflection] - The last action successfully positioned me in front of cabinet 1, which is a potential location for a mug. +- Since the cabinet is closed, I need to open it to examine its contents. This action aligns with my strategy to investigate cabinets systematically. +- Progress is being made toward the task goal, as I am systematically narrowing down possible locations for the mug. + +[E:ep_200555|S:2] [Reflection] - The last action of opening cabinet 1 successfully determined that it is empty, allowing me to eliminate it from consideration. +- While this outcome does not immediately advance progress toward finding a mug, it is a necessary part of the systematic search. +- I need to continue investigating other cabinets or drawers to locate a mug. + +[E:ep_200558|S:3] [Reflection] - The last action successfully positioned me in front of cabinet 2, which is my next target for investigation. +- The cabinet is currently closed, so it needs to be opened to examine its contents. +- This action aligns with my systematic search plan, and progress toward the task is being made as I eliminate potential storage locations for a mug. + +[E:ep_200601|S:4] [Reflection] - The last action of opening cabinet 2 confirmed that it is empty, similar to cabinet 1. This means I need to continue my search in other locations. +- While I have not yet found a mug, eliminating empty cabinets is a necessary step in my systematic search. +- I need to focus on the next closest cabinet or another logical location to continue the search. + +[E:ep_200605|S:5] [Reflection] - The last action successfully positioned me in front of cabinet 3, which contains a visible object (a bowl). However, this object is not a mug, so it does not directly help with completing the task. +- While the bowl is visible, it does not fulfill the task requirement, and I need to continue searching for a mug. +- Eliminating cabinet 3 as a potential location for a mug will help narrow down the possibilities and keep progress on track. + +[E:ep_200608|S:6] [Reflection] - The last action successfully positioned me in front of cabinet 4, which is my next target for investigation. +- Since cabinet 4 is closed, it needs to be opened to check its contents. This action aligns with my systematic approach to searching for a mug. +- Progress is steady, as I am narrowing down possible locations for the mug by eliminating or investigating storage areas one by one. + +[E:ep_200612|S:7] [Reflection] - The last action successfully opened cabinet 4, revealing a plate. While the plate does not help with completing the task, eliminating cabinet 4 as a storage location for a mug is progress. +- I need to stay on track with the systematic search and move to the next cabinet or logical location to find the mug. +- The search process is progressing steadily, and I need to continue investigating storage spaces until I locate the required object. + +[E:ep_200615|S:8] [Reflection] - The last action did not result in a meaningful observation, which suggests I need to explicitly interact with cabinet 5 to investigate its contents. +- The systematic search process has been productive in eliminating irrelevant locations, and I need to maintain this approach to find the mug. +- To address the current situation, I will attempt to open cabinet 5 directly and continue the search. + +[E:ep_200619|S:9] [Reflection] - The last action of opening cabinet 5 confirmed that it is empty. While this does not directly contribute to finding the mug, eliminating cabinet 5 as a potential location is progress. +- I need to continue my systematic search, moving to the next logical cabinet or other potential storage areas to locate the mug. +- The systematic approach is working effectively to eliminate locations, and I need to persist until I find the mug. diff --git a/openmanus_rl/environments/prompts/alfworld.py b/openmanus_rl/environments/prompts/alfworld.py index 5ba015b8..568e3534 100644 --- a/openmanus_rl/environments/prompts/alfworld.py +++ b/openmanus_rl/environments/prompts/alfworld.py @@ -20,15 +20,14 @@ Once you've finished your reasoning, you should choose an admissible action for current step and present it within tags. """ -ALFWORLD_OPENMANUS_TEMPLATE = """ +ALFWORLD_OPENMANUS_INITIAL_TEMPLATE = """ You are an expert agent operating in the ALFRED Embodied Environment. Your task is to: {task_description} -Prior to this step, you have already taken {step_count} step(s). Below are the most recent {history_length} observations and the corresponding actions you took: {action_history} -You are now at step {current_step} and your current observation is: {current_observation} -Your admissible actions of the current situation are: [{admissible_actions}]. -Now it's your turn to take an action. Please output your response using the following separated XML tags: +Current observation: {current_observation} +Available actions: [{admissible_actions}] + +Please begin by analyzing the situation and planning your approach: -First, analyze the current situation and plan: Analyze the current situation and devise a plan to accomplish the task: {task_description} What are the key steps needed to complete this task? @@ -36,36 +35,43 @@ How does this action advance our plan toward completing the task? -Then, if this is not the first step (step_count > 0), reflect on the last action: - -Last observation analysis: Have we made progress toward solving the task? -What did the last action accomplish? Was it successful or did it encounter any issues? -Are we closer to completing the task? - - -Next, analyze your memory and past experiences: +Now, present your chosen action: - -RAG-style retrieval from history: + +action_choice: [selected admissible action from the list] +action_parameters: {{relevant details about the action if applicable}} + -[Thinking history - cite specific past reasoning from previous steps] -Example: "At step 3, I reasoned that we needed to find a knife first before attempting to slice..." -Example: "In step 5's thinking, I identified that the fridge typically contains perishable items..." +From now on, I will provide you with observations after each action, and you should respond with memory recall, reflection, thinking, and your next action in this format: -[Observation/Action history - cite specific observations and outcomes] -Example: "Step 2 observation: 'You are in the kitchen. You see a countertop 1, a cabinet 1...' - this revealed the kitchen layout" -Example: "Step 4 action 'go to fridge 1' succeeded and revealed tomato, lettuce..." -Example: "Step 6 failed with 'Nothing happens' when trying to take knife from drawer 2" + +[Recall relevant past experiences and reasoning from our conversation history] +- What similar situations have I encountered? +- What strategies worked or failed before? +- What objects or locations have I discovered? +- What was my previous reasoning and plans? + -[Milestone tracking] -- Completed: Found target object at step X, Successfully picked up item at step Y -- Current state: Holding [items], Located at [location] - + +[Reflect on the last action and its outcome] +- What did my last action accomplish? +- Was it successful or did it encounter issues? +- How does this outcome affect my plan? +- Am I making progress toward the task goal? + -Finally, present your chosen action: + +[Plan the next step based on memory and reflection] +- Given what I've learned, what should I do next? +- How does this action fit into my overall strategy? +- What do I expect this action to achieve? + action_choice: [selected admissible action from the list] action_parameters: {{relevant details about the action if applicable}} -""" \ No newline at end of file +""" + +# Keep the old template name for backward compatibility +ALFWORLD_OPENMANUS_TEMPLATE = ALFWORLD_OPENMANUS_INITIAL_TEMPLATE \ No newline at end of file diff --git a/test/alfworld_rollout.py b/test/alfworld_rollout.py index 993d1e4f..0182154c 100644 --- a/test/alfworld_rollout.py +++ b/test/alfworld_rollout.py @@ -16,7 +16,7 @@ from openmanus_rl.multi_turn_rollout.openmanus_rollout import OpenmanusRollout from openmanus_rl.environments.env_manager import make_envs -from openmanus_rl.environments.prompts.alfworld import ALFWORLD_OPENMANUS_TEMPLATE +from openmanus_rl.environments.prompts.alfworld import ALFWORLD_OPENMANUS_INITIAL_TEMPLATE # Configure logging logging.basicConfig( @@ -83,14 +83,14 @@ def to_dict(self) -> Dict[str, Any]: class LLMAgent: - """Agent that interfaces with LLM APIs for action generation.""" + """Agent that interfaces with LLM APIs using chat-based conversation.""" def __init__(self): # Check environment for API credentials self._setup_api() - self.history = [] + self.chat_history = [] # Store chat messages for conversation flow self.current_task = None - self.step_counter = 0 + self.is_first_turn = True def _setup_api(self): """Configure API based on environment variables.""" @@ -106,70 +106,64 @@ def _setup_api(self): def reset(self, task_description: str): """Reset agent state for new episode.""" - self.history.clear() + self.chat_history = [] self.current_task = task_description - self.step_counter = 0 + self.is_first_turn = True + + # Add system message to initialize the conversation + self.chat_history.append({ + "role": "system", + "content": "You are an expert AI agent solving household tasks in the ALFRED environment." + }) def act(self, observation: str, admissible_actions: List[str]) -> Tuple[str, str]: """ - Generate action based on current observation. + Generate action based on current observation using chat conversation. Returns: Tuple of (raw_response, action) """ - self.step_counter += 1 - - # Build context from recent history - context = self._build_context() + if self.is_first_turn: + # First turn: use initial template with task description + user_message = self._create_initial_prompt(observation, admissible_actions) + self.is_first_turn = False + else: + # Subsequent turns: just provide observation and actions + user_message = self._create_followup_message(observation, admissible_actions) - # Generate prompt using template - prompt = self._create_prompt(observation, admissible_actions, context) + # Add user message to chat history + self.chat_history.append({"role": "user", "content": user_message}) # Get response from LLM or fallback if self.api_enabled: - response = self._query_llm(prompt) + response = self._query_llm_chat() else: response = self._heuristic_action(admissible_actions) - # Update history - self.history.append({ - 'step': self.step_counter, - 'observation': observation[:200], # Truncate for memory - 'response': response - }) + # Add assistant response to chat history + self.chat_history.append({"role": "assistant", "content": response}) - # Keep history bounded - if len(self.history) > 5: - self.history.pop(0) + # Keep chat history bounded (keep system message + last 10 exchanges) + if len(self.chat_history) > 21: # 1 system + 20 user/assistant messages + # Keep system message and last 10 exchanges (20 messages) + self.chat_history = [self.chat_history[0]] + self.chat_history[-20:] return response, self._extract_action(response) - def _build_context(self) -> str: - """Build context string from recent history.""" - if not self.history: - return "No previous actions taken." - - context_parts = [] - for entry in self.history[-3:]: # Last 3 steps - obs_snippet = entry['observation'][:100] - context_parts.append(f"Step {entry['step']}: {obs_snippet}...") - - return "\n".join(context_parts) - - def _create_prompt(self, observation: str, actions: List[str], context: str) -> str: - """Format prompt using the template.""" - return ALFWORLD_OPENMANUS_TEMPLATE.format( + def _create_initial_prompt(self, observation: str, actions: List[str]) -> str: + """Create initial prompt using the template for first turn.""" + return ALFWORLD_OPENMANUS_INITIAL_TEMPLATE.format( task_description=self.current_task or "Complete the task", - step_count=max(0, self.step_counter - 1), - history_length=min(3, len(self.history)), - action_history=context, - current_step=self.step_counter, current_observation=observation, admissible_actions=", ".join(actions) if actions else "none available" ) - def _query_llm(self, prompt: str) -> str: - """Query the LLM API.""" + def _create_followup_message(self, observation: str, actions: List[str]) -> str: + """Create followup message for subsequent turns.""" + return f"Observation: {observation}\n\nAvailable actions: [{', '.join(actions) if actions else 'none available'}]\n\nPlease respond with your memory recall, reflection, thinking, and action as instructed." + + def _query_llm_chat(self) -> str: + """Query the LLM API using chat history.""" try: headers = { "api-key": self.api_key, @@ -180,10 +174,7 @@ def _query_llm(self, prompt: str) -> str: url = f"{self.api_endpoint}/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-13" payload = { - "messages": [ - {"role": "system", "content": "You are an expert AI agent solving household tasks."}, - {"role": "user", "content": prompt} - ], + "messages": self.chat_history, "max_tokens": 1000, "temperature": 0.7 } @@ -215,7 +206,9 @@ def _heuristic_action(self, available_actions: List[str]) -> str: "open cabinet 1", "take mug 1", "go to sinkbasin 1", "clean mug 1", "go to coffeemachine 1", "put mug 1"] - idx = (self.step_counter - 1) % len(action_sequence) + # Use chat history length to determine step + step_num = (len(self.chat_history) - 1) // 2 # Subtract system message, divide by 2 for user/assistant pairs + idx = step_num % len(action_sequence) action = action_sequence[idx] # Check if action is valid @@ -226,7 +219,10 @@ def _heuristic_action(self, available_actions: List[str]) -> str: action = act break - return f"\nExploring environment systematically.\n\n\n\n{action}\n" + if self.is_first_turn: + return f"\nExploring environment systematically for task: {self.current_task}\n\n\n\naction_choice: {action}\n" + else: + return f"\nRecalling previous exploration attempts.\n\n\n\nContinuing systematic exploration.\n\n\n\nNext logical step in exploration.\n\n\n\naction_choice: {action}\n" def _extract_action(self, response: str) -> str: """Extract action from structured response.""" From 268530b15c9525c8af06aa53577cb895fe147eca Mon Sep 17 00:00:00 2001 From: msra800 Date: Thu, 28 Aug 2025 00:16:23 -0700 Subject: [PATCH 2/4] feat: enhance OpenAI API support and fix action parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add support for both OpenAI and Azure OpenAI APIs in alfworld_rollout.py - Fix action parsing to remove quotes from GPT responses - Update modular_stages.py to properly extract action_choice format - Add team member Zijia Liu to README.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- README.md | 2 +- .../multi_turn_rollout/modular_stages.py | 17 +++++- test/alfworld_rollout.py | 53 ++++++++++++++----- 3 files changed, 56 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 2b0c1969..67873a31 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Code and dataset coming soon! Stay tuned! ## Current Team Members -[@Kunlun Zhu](https://github.com/Kunlun-Zhu)(Ulab-UIUC), [@Jiayi Zhang](https://github.com/didiforgithub)(MetaGPT), [@Xinbing Liang](https://github.com/mannaandpoem),[@Xiangxin Zhou](https://github.com/zhouxiangxin1998), [@Yanfei Zhang](https://github.com/yanfei-zhang-95), [@Yingxuan Yang](https://github.com/zoe-yyx), [@Zeping Chen](https://github.com/rxdaozhang),[@Weijia Zhang](https://github.com/CharlieDreemur), [@Muxin Tian](https://github.com/realtmxi), [@Haofei Yu](https://github.com/lwaekfjlk)(Ulab-UIUC), [@Jinyu Xiang](https://github.com/XiangJinyu), [@Yifan Wu](https://github.com/Evanwu50020), [@Bowen Jin](https://github.com/PeterGriffinJin), [@Blair Yang](https://github.com/blairyeung) +[@Kunlun Zhu](https://github.com/Kunlun-Zhu)(Ulab-UIUC), [@Jiayi Zhang](https://github.com/didiforgithub)(MetaGPT), [@Xinbing Liang](https://github.com/mannaandpoem),[@Xiangxin Zhou](https://github.com/zhouxiangxin1998), [@Yanfei Zhang](https://github.com/yanfei-zhang-95), [@Yingxuan Yang](https://github.com/zoe-yyx), [@Zeping Chen](https://github.com/rxdaozhang),[@Weijia Zhang](https://github.com/CharlieDreemur), [@Muxin Tian](https://github.com/realtmxi), [@Haofei Yu](https://github.com/lwaekfjlk)(Ulab-UIUC), [@Jinyu Xiang](https://github.com/XiangJinyu), [@Yifan Wu](https://github.com/Evanwu50020), [@Bowen Jin](https://github.com/PeterGriffinJin), [@Blair Yang](https://github.com/blairyeung), [@Zijia Liu](https://m-serious.github.io/) --- diff --git a/openmanus_rl/multi_turn_rollout/modular_stages.py b/openmanus_rl/multi_turn_rollout/modular_stages.py index b87f16e4..4f86105e 100644 --- a/openmanus_rl/multi_turn_rollout/modular_stages.py +++ b/openmanus_rl/multi_turn_rollout/modular_stages.py @@ -270,7 +270,22 @@ def parse_simple(self, text: str) -> Dict[str, Optional[str]]: tag_pattern = tag.replace(' ', r'\s*') pattern = f'<{tag_pattern}>(.*?)' match = re.search(pattern, text, re.DOTALL | re.IGNORECASE) - result[tag.replace(' ', '_')] = match.group(1).strip() if match else None + content = match.group(1).strip() if match else None + + # Special handling for action tag to extract action_choice + if tag == 'action' and content: + if 'action_choice:' in content: + parts = content.split('action_choice:') + if len(parts) > 1: + action = parts[1].split('\n')[0].strip() + # Remove quotes if present + action = action.strip("'\"") + content = action + else: + # Remove quotes from first line + content = content.split('\n')[0].strip().strip("'\"") + + result[tag.replace(' ', '_')] = content return result diff --git a/test/alfworld_rollout.py b/test/alfworld_rollout.py index 0182154c..37b3fec8 100644 --- a/test/alfworld_rollout.py +++ b/test/alfworld_rollout.py @@ -94,12 +94,22 @@ def __init__(self): def _setup_api(self): """Configure API based on environment variables.""" - self.api_key = os.getenv('OAI_KEY') - self.api_endpoint = os.getenv('OAI_ENDPOINT') + # Support both OpenAI and Azure OpenAI + self.api_key = os.getenv('OPENAI_API_KEY') or os.getenv('OAI_KEY') + self.api_base = os.getenv('OPENAI_API_BASE') or os.getenv('OAI_ENDPOINT') + self.api_type = os.getenv('OPENAI_API_TYPE', 'openai') # 'openai' or 'azure' - if self.api_key and self.api_endpoint: + if self.api_key: self.api_enabled = True - logger.info(f"API configured: {self.api_endpoint[:30]}...") + if self.api_type == 'azure' and self.api_base: + logger.info(f"Azure OpenAI configured: {self.api_base[:30]}...") + elif self.api_type == 'openai': + if not self.api_base: + self.api_base = 'https://api.openai.com' + logger.info(f"OpenAI API configured") + else: + self.api_enabled = False + logger.warning("Invalid API configuration") else: self.api_enabled = False logger.warning("No API credentials found, using heuristic fallback") @@ -165,20 +175,30 @@ def _create_followup_message(self, observation: str, actions: List[str]) -> str: def _query_llm_chat(self) -> str: """Query the LLM API using chat history.""" try: - headers = { - "api-key": self.api_key, - "Content-Type": "application/json" - } - - # Azure OpenAI format - url = f"{self.api_endpoint}/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-13" + # Set headers based on API type + if self.api_type == 'azure': + headers = { + "api-key": self.api_key, + "Content-Type": "application/json" + } + url = f"{self.api_base}/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-13" + else: # OpenAI API + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + url = f"{self.api_base}/v1/chat/completions" payload = { + "model": "gpt-4o" if self.api_type == 'openai' else None, # OpenAI needs model in payload "messages": self.chat_history, "max_tokens": 1000, "temperature": 0.7 } + # Remove None values from payload + payload = {k: v for k, v in payload.items() if v is not None} + response = requests.post(url, headers=headers, json=payload, timeout=30) if response.status_code == 200: @@ -235,10 +255,15 @@ def _extract_action(self, response: str) -> str: if 'action_choice:' in action_text: parts = action_text.split('action_choice:') if len(parts) > 1: - return parts[1].split('\n')[0].strip() + action = parts[1].split('\n')[0].strip() + # Remove quotes if present + action = action.strip("'\"") + return action - # Return first line if no special format - return action_text.split('\n')[0].strip() + # Return first line if no special format, removing quotes + action = action_text.split('\n')[0].strip() + action = action.strip("'\"") + return action # Smarter fallback: try to extract meaningful action from response response_lower = response.lower() From 6cd93427f1e84f844852298760da8fd9f5845d0d Mon Sep 17 00:00:00 2001 From: Murphy Tian Date: Thu, 28 Aug 2025 10:28:49 +0000 Subject: [PATCH 3/4] feat: add .env.example for OpenAI API configuration --- .env.example | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..5e4cd425 --- /dev/null +++ b/.env.example @@ -0,0 +1,2 @@ +OPENAI_API_BASE= +OPENAI_API_KEY= \ No newline at end of file From 1c446101cd2b747a9d71072c3ae720c544d0bd32 Mon Sep 17 00:00:00 2001 From: Murphy Tian Date: Thu, 28 Aug 2025 10:29:45 +0000 Subject: [PATCH 4/4] feat: improve API handling in alfworld_rollout.py - Load environment variables from .env file for API configuration - Remove fallback heuristic action; raise errors for API issues - Ensure LLM response is only fetched when API is enabled --- test/alfworld_rollout.py | 42 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/test/alfworld_rollout.py b/test/alfworld_rollout.py index 37b3fec8..89e28062 100644 --- a/test/alfworld_rollout.py +++ b/test/alfworld_rollout.py @@ -7,13 +7,15 @@ from datetime import datetime from typing import List, Dict, Any, Optional, Tuple from dataclasses import dataclass, asdict - +from dotenv import load_dotenv import requests # Configure project imports PROJECT_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(PROJECT_ROOT)) +load_dotenv(PROJECT_ROOT / '.env') + from openmanus_rl.multi_turn_rollout.openmanus_rollout import OpenmanusRollout from openmanus_rl.environments.env_manager import make_envs from openmanus_rl.environments.prompts.alfworld import ALFWORLD_OPENMANUS_INITIAL_TEMPLATE @@ -144,11 +146,11 @@ def act(self, observation: str, admissible_actions: List[str]) -> Tuple[str, str # Add user message to chat history self.chat_history.append({"role": "user", "content": user_message}) - # Get response from LLM or fallback - if self.api_enabled: - response = self._query_llm_chat() - else: - response = self._heuristic_action(admissible_actions) + # Get response from LLM only, no fallback + if not self.api_enabled: + raise RuntimeError("API not configured. Please set OPENAI_API_KEY and OPENAI_API_BASE") + + response = self._query_llm_chat() # Add assistant response to chat history self.chat_history.append({"role": "assistant", "content": response}) @@ -213,36 +215,12 @@ def _query_llm_chat(self) -> str: return content else: logger.error(f"API error {response.status_code}: {response.text[:200]}") - return self._heuristic_action([]) + raise RuntimeError(f"API request failed with status {response.status_code}: {response.text[:200]}") except Exception as e: logger.error(f"API exception: {e}") - return self._heuristic_action([]) + raise RuntimeError(f"API request failed: {e}") - def _heuristic_action(self, available_actions: List[str]) -> str: - """Simple heuristic for action selection when API unavailable.""" - # Basic exploration strategy - action_sequence = ["look", "inventory", "go to kitchen", "go to cabinet 1", - "open cabinet 1", "take mug 1", "go to sinkbasin 1", - "clean mug 1", "go to coffeemachine 1", "put mug 1"] - - # Use chat history length to determine step - step_num = (len(self.chat_history) - 1) // 2 # Subtract system message, divide by 2 for user/assistant pairs - idx = step_num % len(action_sequence) - action = action_sequence[idx] - - # Check if action is valid - if available_actions and action not in str(available_actions): - # Try to find a similar valid action - for act in available_actions: - if any(keyword in act.lower() for keyword in ['go', 'take', 'put', 'open']): - action = act - break - - if self.is_first_turn: - return f"\nExploring environment systematically for task: {self.current_task}\n\n\n\naction_choice: {action}\n" - else: - return f"\nRecalling previous exploration attempts.\n\n\n\nContinuing systematic exploration.\n\n\n\nNext logical step in exploration.\n\n\n\naction_choice: {action}\n" def _extract_action(self, response: str) -> str: """Extract action from structured response."""