World-In-World · vanillaer · Apr 3, 2026 · Mar 26, 2026
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "third_party/libero"]
+	path = third_party/libero
+	url = https://github.com/Lifelong-Robot-Learning/LIBERO.git
diff --git a/docs/01_setup_env.md b/docs/01_setup_env.md
@@ -193,6 +193,43 @@ pip install -r requirements.txt # other required packages
 
 If you meet anything like `qt.qpa.plugin: Could not find the Qt platform plugin "xcb"`, try `pip uninstall opencv-python opencv-python-headless` and `pip install opencv-python-headless==4.11.0.86`
 
+### Setup LIBERO backend with uv (recommended)
+
+If you want to run the `libero_object` / `libero_spatial` backend for manipulation, we recommend using a dedicated `uv` environment.
+
+From repository root:
+
+```bash
+cd /path/to/world-in-world
+git submodule update --init --recursive third_party/libero
+
+uv venv --python 3.8 downstream/world-in-world-manip/.venv-libero
+source downstream/world-in-world-manip/.venv-libero/bin/activate
+
+uv pip sync \
+  downstream/world-in-world-manip/requirements_libero.txt \
+  third_party/libero/requirements.txt \
+  --extra-index-url https://download.pytorch.org/whl/cu113 \
+  --index-strategy=unsafe-best-match
+
+uv pip install -e third_party/libero
+
+export PYTHONPATH=$PYTHONPATH:/path/to/world-in-world/third_party/libero:/path/to/world-in-world/downstream/world-in-world-manip
+export LIBERO_CONFIG_PATH=/path/to/world-in-world/.cache/libero
+```
+
+### Start LIBERO environment server
+
+After the setup above:
+
+```bash
+cd /path/to/world-in-world/downstream/world-in-world-manip
+source .venv-libero/bin/activate
+export PYTHONPATH=$PYTHONPATH:/path/to/world-in-world/third_party/libero:/path/to/world-in-world/downstream/world-in-world-manip
+export LIBERO_CONFIG_PATH=/path/to/world-in-world/.cache/libero
+bash scripts/run_libero_env_server.sh 127.0.0.1 8765
+```
+
 ### Install 3D-Diffuser-Actor (for diff-base and diff-igenex)
 
 ```bash

diff --git a/docs/03_run_commands.md b/docs/03_run_commands.md
@@ -218,17 +218,60 @@ CUDA_VISIBLE_DEVICES="0" bash scripts/run_manip.sh \
 - `<vllm_hosts>`: hostname and port for vLLM server (same format as other tasks, e.g., `localhost:8010`)
 - `<igenex_host>`: host for world model server (same format as other tasks, e.g., `localhost:6010`)
 
-**Example** for `vlm-base` with `Qwen2.5-VL-72B-Instruct-AWQ`, exp_id `09.12_qwen_base`, 1 worker, vLLM at `localhost:8010`, and world model at `localhost:6010`:
+**LIBERO prerequisite:** start LIBERO env server first (in `downstream/world-in-world-manip`):
+```bash
+source .venv-libero/bin/activate
+export LIBERO_CONFIG_PATH=/path/to/world-in-world/.cache/libero
+export PYTHONPATH=$PYTHONPATH:/path/to/world-in-world/third_party/libero:/path/to/world-in-world/downstream/world-in-world-manip
+bash scripts/run_libero_env_server.sh 127.0.0.1 8765
+```
+
+**Example A (`vlm-base`, no world model)**
 ```bash
 CUDA_VISIBLE_DEVICES="0" bash scripts/run_manip.sh \
     vlm-base \
-    09.12_qwen_base \
+    09.12_libero_base \
     Qwen/Qwen2.5-VL-72B-Instruct-AWQ \
     1 \
     "localhost:8010" \
-    "localhost:6010"
+    "localhost:7000" \
+    manip_backend=libero \
+    libero_env_url=http://127.0.0.1:8765 \
+    down_sample_ratio=0.02 \
+    n_shots=2 \
+    'eval_sets=[libero_object]'
+```
+To run spatial instead of object, change `eval_sets` to `'eval_sets=[libero_spatial]'`.
+
+**Example B (`vlm-igenex`, with world model)**
+
+Start a world model manager first (from repo root):
+```bash
+CUDA_VISIBLE_DEVICES="0,1" bash downstream/scripts/init_worldmodel_manager.sh \
+    09.12_libero_wm \
+    2 \
+    <wm_type> \
+    --task_type=manipulation
 ```
 
+Then run manipulation:
+```bash
+CUDA_VISIBLE_DEVICES="0" bash scripts/run_manip.sh \
+    vlm-igenex \
+    09.12_libero_igenex \
+    Qwen/Qwen2.5-VL-72B-Instruct-AWQ \
+    1 \
+    "localhost:8010" \
+    "localhost:7000" \
+    manip_backend=libero \
+    libero_env_url=http://127.0.0.1:8765 \
+    wm_condition_mode=zero_shot_text \
+    down_sample_ratio=0.02 \
+    n_shots=2 \
+    'eval_sets=[libero_object]'
+```
+To run spatial instead of object, change `eval_sets` to `'eval_sets=[libero_spatial]'`.
+
 ---
 
 [↩︎ Back to Getting Started Checklist](../README.md#2-checklist-for-running-an-evaluation)
@@ -311,4 +354,4 @@ python wiw_manip/aggregate_results.py running/vlm-base/Qwen2.5-VL-72B-Instruct-A
 
 [↩︎ Back to Getting Started Checklist](../README.md#2-checklist-for-running-an-evaluation)
 
----
+---
diff --git a/downstream/world-in-world-manip/.gitignore b/downstream/world-in-world-manip/.gitignore
@@ -8,6 +8,7 @@ temp/
 src/
 outputs/
 running/
+.venv-libero/
 
 *.png
 *.tar.gz
@@ -23,4 +24,3 @@ code_organization_note.md
 docs/
 test.sh
 scripts/test_manip.sh
-
diff --git a/downstream/world-in-world-manip/requirements.txt b/downstream/world-in-world-manip/requirements.txt
@@ -1,5 +1,6 @@
 numpy==1.26.4
 Pillow
+PyYAML
 pyquaternion
 setuptools==75.6.0
 natsort

diff --git a/downstream/world-in-world-manip/requirements_libero.txt b/downstream/world-in-world-manip/requirements_libero.txt
@@ -0,0 +1,17 @@
+numpy==1.22.4
+Pillow
+PyYAML
+scipy
+gymnasium
+importlib-metadata
+opencv-python-headless==4.11.0.86
+hydra-core==1.2.0
+matplotlib==3.5.3
+cloudpickle==2.1.0
+bddl==1.0.1
+robosuite==1.4.1
+mujoco==3.2.3
+ultralytics
+torch==1.11.0+cu113
+torchvision==0.12.0+cu113
+torchaudio==0.11.0+cu113
diff --git a/downstream/world-in-world-manip/scripts/libero_env_server.py b/downstream/world-in-world-manip/scripts/libero_env_server.py
@@ -0,0 +1,136 @@
+import argparse
+import base64
+import json
+import logging
+import threading
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+import numpy as np
+
+from wiw_manip.envs.libero_env_core import LocalLiberoEnv
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("libero_env_server")
+
+ENV = None
+
+
+def _to_jsonable(value):
+    if isinstance(value, dict):
+        return {k: _to_jsonable(v) for k, v in value.items()}
+    if isinstance(value, (list, tuple)):
+        return [_to_jsonable(v) for v in value]
+    if isinstance(value, np.ndarray):
+        return value.tolist()
+    if isinstance(value, (np.bool_,)):
+        return bool(value)
+    if isinstance(value, (np.integer,)):
+        return int(value)
+    if isinstance(value, (np.floating,)):
+        return float(value)
+    return value
+
+
+def _json_response(handler, payload, status=200):
+    body = json.dumps(_to_jsonable(payload)).encode("utf-8")
+    handler.send_response(status)
+    handler.send_header("Content-Type", "application/json")
+    handler.send_header("Content-Length", str(len(body)))
+    handler.end_headers()
+    handler.wfile.write(body)
+
+
+class Handler(BaseHTTPRequestHandler):
+    @staticmethod
+    def _thread_id():
+        return threading.get_ident()
+
+    def _read_json(self):
+        length = int(self.headers.get("Content-Length", "0"))
+        raw = self.rfile.read(length) if length > 0 else b"{}"
+        return json.loads(raw.decode("utf-8"))
+
+    def do_GET(self):
+        global ENV
+        if self.path == "/health":
+            _json_response(self, {"ok": True, "configured": ENV is not None})
+            return
+        _json_response(self, {"error": "not found"}, status=404)
+
+    def do_POST(self):
+        global ENV
+        payload = self._read_json()
+        try:
+            if self.path == "/configure":
+                logger.info("POST /configure on thread_id=%s", self._thread_id())
+                if ENV is not None:
+                    ENV.close()
+                ENV = LocalLiberoEnv(
+                    eval_set=payload["eval_set"],
+                    img_size=tuple(payload["img_size"]),
+                    down_sample_ratio=payload["down_sample_ratio"],
+                    log_path=payload["log_path"],
+                    max_step=payload["max_step"],
+                    action_repeat=int(payload.get("action_repeat", 50)),
+                )
+                _json_response(self, {"state": ENV.export_state()})
+                return
+            if ENV is None:
+                _json_response(self, {"error": "server not configured"}, status=400)
+                return
+            if self.path == "/init_dataset_and_tasks":
+                ENV.init_dataset_and_tasks(
+                    eval_task=payload["eval_task"],
+                    down_sample_ratio=payload["down_sample_ratio"],
+                    log_path=payload["log_path"],
+                )
+                _json_response(self, {"state": ENV.export_state()})
+                return
+            if self.path == "/reset":
+                ENV.reset()
+                _json_response(self, {"state": ENV.export_state()})
+                return
+            if self.path == "/step":
+                logger.info("POST /step on thread_id=%s", self._thread_id())
+                _, reward, done, info = ENV.step(payload["action"], debug_payload=payload.get("debug_payload"))
+                _json_response(self, {"state": ENV.export_state(), "reward": reward, "done": done, "info": info})
+                return
+            if self.path == "/render":
+                logger.info("POST /render on thread_id=%s", self._thread_id())
+                camera_views = payload.get("camera_views")
+                images = ENV.render_images(camera_views)
+                annotation_points = ENV.get_annotation_points(camera_views)
+                encoded = {
+                    key: base64.b64encode(ENV.encode_png_base64(image)).decode("utf-8")
+                    for key, image in images.items()
+                }
+                _json_response(self, {"images": encoded, "annotation_points": annotation_points, "state": ENV.export_state()})
+                return
+            if self.path == "/close":
+                ENV.close()
+                ENV = None
+                _json_response(self, {"ok": True})
+                return
+            _json_response(self, {"error": "not found"}, status=404)
+        except Exception as exc:
+            logger.exception("Request failed")
+            _json_response(self, {"error": str(exc)}, status=500)
+
+    def log_message(self, format, *args):
+        logger.info("%s - %s", self.address_string(), format % args)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", default="127.0.0.1")
+    parser.add_argument("--port", type=int, default=8765)
+    args = parser.parse_args()
+
+    server = HTTPServer((args.host, args.port), Handler)
+    logger.info("Starting LIBERO env server (single-thread) on http://%s:%d", args.host, args.port)
+    server.serve_forever()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/downstream/world-in-world-manip/scripts/run_manip.sh b/downstream/world-in-world-manip/scripts/run_manip.sh
@@ -23,6 +23,21 @@ igenex_host_csv="$6"
 shift 6
 extra_args=("$@")         # any additional Hydra/CLI args
 
+# LIBERO default: one high-level VLM action is executed as repeated absolute-target updates.
+manip_backend=""
+has_libero_action_repeat="0"
+for arg in "${extra_args[@]}"; do
+  if [[ "${arg}" == manip_backend=* ]]; then
+    manip_backend="${arg#*=}"
+  fi
+  if [[ "${arg}" == libero_action_repeat=* ]]; then
+    has_libero_action_repeat="1"
+  fi
+done
+if [[ "${manip_backend}" == "libero" && "${has_libero_action_repeat}" == "0" ]]; then
+  extra_args+=("libero_action_repeat=50")
+fi
+
 # Set model_type to "remote" as default
 model_type="remote"
 

diff --git a/downstream/world-in-world-manip/wiw_manip/configs/config.yaml b/downstream/world-in-world-manip/wiw_manip/configs/config.yaml
@@ -19,6 +19,10 @@ visual_icl: null
 tp: null
 log_level: null
 igenex_host: null
+wm_condition_mode: null
 enable_path_obs: null
+manip_backend: rlbench
+libero_env_url: http://127.0.0.1:8765
+libero_action_repeat: 50
 vlm__temperature: 1.0
-vlm__top_k: -1
+vlm__top_k: -1
diff --git a/downstream/world-in-world-manip/wiw_manip/configs/vlm-base.yaml b/downstream/world-in-world-manip/wiw_manip/configs/vlm-base.yaml
@@ -19,7 +19,10 @@ exp_name: 06.26_baseline_debug
 visual_icl: 0
 tp: 1
 proposal_num: 1
-executed_action_per_step: 5
+executed_action_per_step: 6
 use_last_action: False
 enable_path_obs: False
-max_step: 15
+max_step: 12
+manip_backend: rlbench
+libero_env_url: http://127.0.0.1:8765
+libero_action_repeat: 100
diff --git a/downstream/world-in-world-manip/wiw_manip/configs/vlm-igenex.yaml b/downstream/world-in-world-manip/wiw_manip/configs/vlm-igenex.yaml
@@ -17,11 +17,12 @@ resolution: 500
 exp_name: 06.26_withWM_igen_debug
 visual_icl: 0
 tp: 1
-executed_action_per_step: 5
+executed_action_per_step: 6
 use_last_action: False
 proposal_num: 4
 igenex_host: localhost:6010
+wm_condition_mode: zero_shot_text
 pred_img_size: 384
 mpc_mode: ranking  # "iterative" or "ranking"
 enable_path_obs:  False
-max_step: 15
+max_step: 15