From cd5f37024a7e7abf42e13bff97ad6411cf5f8f7c Mon Sep 17 00:00:00 2001 From: ffffiuling <17692510923@163.com> Date: Sat, 7 Mar 2026 23:38:59 +0800 Subject: [PATCH 1/2] LATS/programming --- applications/lats/README.md | 113 ++++ applications/lats/__init__.py | 1 + applications/lats/assets/config/.gitkeep | 0 applications/lats/assets/config/defaults.json | 5 + applications/lats/components/__init__.py | 1 + applications/lats/components/agents.py | 201 +++++++ applications/lats/components/formatters.py | 36 ++ applications/lats/components/tree.py | 84 +++ applications/lats/humaneval/__init__.py | 1 + applications/lats/humaneval/executor.py | 82 +++ applications/lats/humaneval/load.py | 53 ++ applications/lats/humaneval/timeout_utils.py | 31 ++ applications/lats/main.py | 176 +++++++ applications/lats/utils/__init__.py | 1 + applications/lats/utils/tee.py | 29 ++ applications/lats/workflows/__init__.py | 1 + applications/lats/workflows/controller.py | 103 ++++ applications/lats/workflows/graph.py | 149 ++++++ masfactory-visualizer/package-lock.json | 493 +++++++----------- .../webview-ui/package-lock.json | 4 - package-lock.json | 6 + 21 files changed, 1265 insertions(+), 305 deletions(-) create mode 100644 applications/lats/README.md create mode 100644 applications/lats/__init__.py create mode 100644 applications/lats/assets/config/.gitkeep create mode 100644 applications/lats/assets/config/defaults.json create mode 100644 applications/lats/components/__init__.py create mode 100644 applications/lats/components/agents.py create mode 100644 applications/lats/components/formatters.py create mode 100644 applications/lats/components/tree.py create mode 100644 applications/lats/humaneval/__init__.py create mode 100644 applications/lats/humaneval/executor.py create mode 100644 applications/lats/humaneval/load.py create mode 100644 applications/lats/humaneval/timeout_utils.py create mode 100644 applications/lats/main.py create mode 100644 applications/lats/utils/__init__.py create mode 100644 applications/lats/utils/tee.py create mode 100644 applications/lats/workflows/__init__.py create mode 100644 applications/lats/workflows/controller.py create mode 100644 applications/lats/workflows/graph.py create mode 100644 package-lock.json diff --git a/applications/lats/README.md b/applications/lats/README.md new file mode 100644 index 0000000..50767d9 --- /dev/null +++ b/applications/lats/README.md @@ -0,0 +1,113 @@ +# LATS (Language Agent Tree Search) – HumanEval on MASFactory + +This directory is a [MASFactory](https://github.com/BUPT-GAMMA/MASFactory) application that reproduces **LATS** (Language Agent Tree Search) on the **HumanEval** (programming) benchmark. + +- **Paper**: [Language Agent Tree Search Unifies Reasoning Acting and Planning in Language Models](https://arxiv.org/abs/2310.04406) (ICML 2024) +- **Upstream reference**: [LanguageAgentTreeSearch](https://github.com/andyz245/LanguageAgentTreeSearch) (programming / HumanEval) + +## Layout + +``` +lats/ +├── main.py # Entry: argparse, load dataset, build graph, run loop, tee to log +├── README.md +├── assets/ +│ └── config/ # Config (default dataset path, etc.); datasets not in repo +│ └── defaults.json +├── workflows/ # Graph and controller +│ ├── graph.py # Build RootGraph, LATSTemplate, run_one_problem +│ └── controller.py # lats_controller_logic (MCTS select / expand / backprop / terminate) +├── components/ # Custom MASFactory components +│ ├── formatters.py # ContentMessageFormatter, passthrough dicts +│ ├── agents.py # LATSBaseAgent, LATSLLMAgent, ReflectionAgent, HumanEval executor +│ └── tree.py # LATSNode, TreeManager, gather_context_from_tree +├── humaneval/ # HumanEval data and execution +│ ├── load.py # load_humaneval_jsonl, parse_internal_tests_from_test, extract_python_code +│ ├── executor.py # run_internal_tests, full_evaluate, verify_evaluation +│ └── timeout_utils.py # function_with_timeout +└── utils/ + └── tee.py # Tee output to terminal and optional log file +``` + +## Context and memory in this port + +In the LATS paper and some references, **context** and **memory** appear as conceptual (or explicit) elements. In this MASFactory application we do **not** add separate **Context** or **Memory** nodes. They are implemented as follows. + +### Context + +**Role:** Provide the LLM with the accumulated trajectory (previous code attempts, test results, and reflections) so it can produce the next, improved attempt. + +**Implementation:** Context is built **inside the controller** and passed to the LLM via the existing message flow: + +1. After each **Reflection** step, the controller selects the next node (MCTS selection) and gets the path from that node back to the root. +2. `**gather_context_from_tree(selected)`** in `components/tree.py` collects along that path: + - previous **solutions** (code), + - **test_feedback** (unit test results), + - **reflections** (short explanations of failure). +3. The controller assembles these into a single string `**reflexion_prompt`** (with blocks like `[previous impl 1]`, `[unit test results 1]`, `[reflection 1]`, etc.). +4. `**reflexion_prompt**` is passed to **LLM_Agent** as the prompt for the next iteration. + +So “context” is **inlined into the prompt**: it is computed in `workflows/controller.py` and carried in the message key `reflexion_prompt` to the LLM node, without a dedicated Context node. + +### Memory + +**Role:** Persist the search tree (all tried solutions, feedback, rewards, and structure) across loop iterations. + +**Implementation:** Memory is the **search tree** maintained by the controller: + +1. `**LATSNode`** (in `components/tree.py`) stores per-node state: `solution`, `test_feedback`, `reflection`, `value`, `visits`, `parent`, `children`. +2. `**TreeManager**` holds the `root`, `current_node`, and `_max_iters`, and implements **selection** (UCT), **backprop** (reward update), and tree growth (adding children when the Executor returns a new attempt). +3. The controller **reads and updates** this tree each loop: it appends new children, runs backprop, and uses `gather_context_from_tree` to build the next context. + +So “memory” is the **tree state** (nodes + manager) owned and updated by the controller logic; there is no separate Memory agent or node. The graph nodes you see in MASFactory are only: **LLM_Agent**, **Executor**, **Reflection**, and the **controller** (Loop’s terminate function). Context and memory are implemented **inside** the controller and the shared tree, not as extra nodes. + +## Setup + +From the repo root (parent of `lats/`): + +```bash +# Install MASFactory and dependencies (openai, etc.) +pip install masfactory openai + +# Optional: set default dataset in assets/config/defaults.json +# "dataset_path": "path/to/HumanEval.jsonl.gz" +``` + +Environment variables: + +- **OPENAI_API_KEY** (required) +- **OPENAI_API_BASE** (optional, for proxy/custom endpoint) +- **LATS_MODEL** (optional, default `gpt-4`) +- **LATS_MAX_ITERS** (optional, default `8`) +- **NUMBER_OF_TESTS** (optional, default `2`) +- **MASFACTORY_VISUALIZER_PORT** (optional, for runtime view) + +## Run + +From the repo root (e.g. `D:\PE`): + +```bash +# Default dataset path may be read from assets/config/defaults.json +python lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --log logs/lats.log +``` + +Examples: + +```bash +# Limit to 5 problems, write same output to log file +python lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --limit 5 --log logs/lats.log + +# Print every attempt (not only final solution) +python lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --print-code --log logs/lats.log + +# Paper-aligned defaults: max_iters=8, number_of_tests=2 (no need to pass if using env or defaults) +python lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --log logs/lats.log +``` + +Output is printed to the terminal and, when `--log` is set, appended to the given file. + +## Metrics + +- **Pass@1**: fraction of problems for which the best solution passes the full HumanEval test. +- Defaults align with the upstream GPT-4 run script: `max_iters=8`, `number_of_tests=2`. + diff --git a/applications/lats/__init__.py b/applications/lats/__init__.py new file mode 100644 index 0000000..aeb3319 --- /dev/null +++ b/applications/lats/__init__.py @@ -0,0 +1 @@ +# LATS application (HumanEval on MASFactory) diff --git a/applications/lats/assets/config/.gitkeep b/applications/lats/assets/config/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/applications/lats/assets/config/defaults.json b/applications/lats/assets/config/defaults.json new file mode 100644 index 0000000..46ead8c --- /dev/null +++ b/applications/lats/assets/config/defaults.json @@ -0,0 +1,5 @@ +{ + "dataset_path": "", + "max_iters": 8, + "number_of_tests": 2 +} diff --git a/applications/lats/components/__init__.py b/applications/lats/components/__init__.py new file mode 100644 index 0000000..e813804 --- /dev/null +++ b/applications/lats/components/__init__.py @@ -0,0 +1 @@ +# LATS custom components (formatters, agents, tree) diff --git a/applications/lats/components/agents.py b/applications/lats/components/agents.py new file mode 100644 index 0000000..474aa49 --- /dev/null +++ b/applications/lats/components/agents.py @@ -0,0 +1,201 @@ +""" +LATS agents: base, LLM, Reflection (and Executor implemented as ReflectionAgent with role=executor). +""" +import os +from masfactory import Agent, OpenAIModel +from masfactory.core.message import ParagraphMessageFormatter + +from . import formatters as fmt +from .tree import LATSNode +from ..humaneval.load import extract_python_code, parse_internal_tests_from_test +from ..humaneval.executor import run_internal_tests, full_evaluate +from ..utils.tee import tee, get_log_file + +# Model instance (injected or from env) +model_instance = OpenAIModel( + api_key=os.environ.get("OPENAI_API_KEY", ""), + base_url=os.environ.get("OPENAI_API_BASE", ""), + model_name=os.environ.get("LATS_MODEL", "gpt-4"), +) + +# When True, print each attempt body to terminal and log (--print-code) +_print_code_attempts = False + + +def set_print_code_attempts(value: bool): + global _print_code_attempts + _print_code_attempts = value + + +_ENV_PUSH_KEYS = { + "observation": "observation", + "reward": "reward", + "action": "action", + "full_passed": "full_passed", +} + + +def _print_generated_func_body(func_body: str, problem_name: str = "") -> None: + """Print generated code to terminal and log (if --log and --print-code).""" + if not _print_code_attempts: + return + title = "GENERATED FUNC BODY" + if problem_name: + title += f" [{problem_name}]" + tee(f"\n--------------------- {title} ---------------------", get_log_file()) + tee(func_body, get_log_file()) + tee("------------------------------------------\n", get_log_file()) + + +def _run_humaneval_forward(input_dict: dict) -> dict: + """HumanEval execution (originally HumanEvalEnvironment._forward). Used by ReflectionAgent with role=executor.""" + content = input_dict.get("action", "") or input_dict.get("content", "") + raw = str(content).strip() + problem = input_dict.get("problem") or {} + internal_tests = input_dict.get("internal_tests") or [] + entry_point = problem.get("entry_point", "") + test = problem.get("test", "") + prompt = problem.get("prompt", "") + + fail_safe = { + "observation": "Error: No valid Python code.", + "reward": 0.0, + "reward_internal": 0.0, + "reward_real": 0.0, + "full_passed": False, + "action": raw, + "problem": problem, + "internal_tests": internal_tests, + } + + code = extract_python_code(raw) + if not code: + fail_safe["observation"] = "Error: Use a ```python ... ``` block or full function." + return fail_safe + if "def " not in code and prompt: + code = prompt.rstrip() + "\n" + code + + if _print_code_attempts: + _print_generated_func_body(code, problem.get("name", "")) + + if not internal_tests: + internal_tests = parse_internal_tests_from_test(test, max_tests=6) + + is_passing_internal, feedback, reward_internal = run_internal_tests( + code, internal_tests, timeout=5 + ) + reward_real = 1.0 if full_evaluate(entry_point, code, test, timeout=10) else 0.0 + reward = reward_internal + reward_real + + return { + "observation": feedback, + "reward": reward, + "reward_internal": reward_internal, + "reward_real": reward_real, + "full_passed": reward_real >= 1.0, + "action": code, + "problem": problem, + "internal_tests": internal_tests, + } + + +class LATSBaseAgent(Agent): + """Base agent: config merged into kwargs; role can be used by subclasses (e.g. ReflectionAgent as executor).""" + + def __init__(self, name, *args, **kwargs): + if args and isinstance(args[0], dict): + kwargs = {**args[0], **kwargs} + args = () + self._role = kwargs.pop("role", None) + kwargs.setdefault("model", model_instance) + super().__init__(name, *args, **kwargs) + + +class LATSLLMAgent(LATSBaseAgent): + """Pass-through problem/internal_tests; formatter merges _lats_llm_passthrough to satisfy output_keys.""" + + def step(self, input_dict: dict) -> dict: + fmt._lats_llm_passthrough = { + "problem": input_dict.get("problem"), + "internal_tests": input_dict.get("internal_tests"), + } + return super().step(input_dict) + + def _forward(self, input_dict: dict) -> dict: + out = super()._forward(input_dict) + out["problem"] = input_dict.get("problem") + out["internal_tests"] = input_dict.get("internal_tests") + if "content" not in out or not str(out.get("content", "")).strip(): + out["content"] = ( + out.get("content") + or out.get("action") + or out.get("response") + or out.get("text") + or str(out) + ) + return out + + +class ReflectionAgent(LATSBaseAgent): + """Reflection node. When config role=executor, same class acts as Executor (HumanEval runner) for visualizer.""" + + def __init__(self, name, *args, **kwargs): + super().__init__(name, *args, **kwargs) + if getattr(self, "_role", None) == "executor": + self._push_keys = dict(_ENV_PUSH_KEYS) + + @property + def push_keys(self): + if getattr(self, "_role", None) == "executor": + return dict(_ENV_PUSH_KEYS) + return super().push_keys + + def step(self, input_dict: dict) -> dict: + if getattr(self, "_role", None) != "executor": + fmt._lats_reflection_passthrough = { + k: input_dict.get(k) + for k in ( + "action", + "observation", + "reward", + "full_passed", + "problem", + "internal_tests", + ) + if k in input_dict + } + return super().step(input_dict) + + def _forward(self, input_dict: dict) -> dict: + if getattr(self, "_role", None) == "executor": + ctx = None + result = {} + try: + from masfactory.visualizer import get_bridge + bridge = get_bridge() if get_bridge else None + if bridge is not None: + ctx = bridge.node_start(self, input_dict) + except Exception: + pass + try: + result = _run_humaneval_forward(input_dict) + finally: + if ctx is not None: + try: + from masfactory.visualizer import get_bridge as _gb + b = _gb() if _gb else None + if b is not None: + b.node_end(ctx, result, node=self) + except Exception: + pass + return result + out = super()._forward(input_dict) + ref = (out.get("content") or out.get("action") or str(out)).strip() + out = {**out, "reflection": ref} + out["problem"] = input_dict.get("problem") + out["internal_tests"] = input_dict.get("internal_tests") + out["action"] = input_dict.get("action") + out["observation"] = input_dict.get("observation") + out["reward"] = input_dict.get("reward") + out["full_passed"] = input_dict.get("full_passed", False) + return out diff --git a/applications/lats/components/formatters.py b/applications/lats/components/formatters.py new file mode 100644 index 0000000..df4216f --- /dev/null +++ b/applications/lats/components/formatters.py @@ -0,0 +1,36 @@ +""" +Plain-text output formatter for LATS LLM (code/natural language, not JSON). +Merge keys from a module-level dict to satisfy output_keys. +""" +from masfactory.core.message import MessageFormatter + +# Filled by agents before step(); formatter merges these to satisfy output_keys +_lats_llm_passthrough = {} +_lats_reflection_passthrough = {} + + +class ContentMessageFormatter(MessageFormatter): + """Expose model raw output as a single key. merge_global names the module-level dict to merge for output_keys.""" + + def __init__(self, output_key: str = "content", merge_global: str = ""): + super().__init__() + self._output_key = output_key + self._merge_global = merge_global + self._is_input_formatter = True + self._is_output_formatter = True + self._agent_introducer = ( + f"Your response will be used as the value for the key '{output_key}'. " + "Provide your response as plain text only (e.g. Python code or a short explanation). Do not wrap in JSON." + ) + + def format(self, message: str) -> dict: + raw = (message.strip() if isinstance(message, str) and message else "") or "" + out = {self._output_key: raw} + if self._merge_global: + passthrough = globals().get(self._merge_global, {}) + if isinstance(passthrough, dict) and passthrough: + out.update(passthrough) + return out + + def dump(self, message: dict) -> str: + return str(message.get(self._output_key, "")) diff --git a/applications/lats/components/tree.py b/applications/lats/components/tree.py new file mode 100644 index 0000000..7c3afe4 --- /dev/null +++ b/applications/lats/components/tree.py @@ -0,0 +1,84 @@ +""" +LATS search tree and MCTS: node, selection, backprop, context gathering. +""" +import math +from typing import List, Tuple + + +class LATSNode: + def __init__( + self, + solution: str = "", + parent: "LATSNode | None" = None, + context: str = "", + depth: int = 0, + ): + self.solution = solution + self.parent = parent + self.children: List[LATSNode] = [] + self.value = 0.0 + self.visits = 0 + self.context = context + self.depth = depth + self.reflection = "" + self.test_feedback = "" + + def uct(self, exploration_weight: float = 1.0) -> float: + if self.visits == 0: + return self.value + p = self.parent + p_visits = p.visits if p else 1 + return (self.value / self.visits) + exploration_weight * math.sqrt( + math.log(max(1, p_visits)) / self.visits + ) + + def best_child(self) -> "LATSNode | None": + if not self.children: + return None + return max(self.children, key=lambda c: c.uct()) + + def best_child_value(self) -> "LATSNode | None": + if not self.children: + return None + return max(self.children, key=lambda c: c.value) + + def update(self, reward: float): + self.visits += 1 + self.value += reward + + +def gather_context_from_tree( + node: LATSNode, +) -> Tuple[List[str], List[str], List[str]]: + """Collect (solution, test_feedback, reflection) from current node to root for reflexion context.""" + impls, feedbacks, reflections = [], [], [] + while node: + if node.solution: + impls.append(node.solution) + feedbacks.append(node.test_feedback or "") + reflections.append(node.reflection or "") + node = node.parent + return impls[::-1], feedbacks[::-1], reflections[::-1] + + +class TreeManager: + def __init__(self, problem: dict, root: LATSNode): + self.problem = problem + self.root = root + self.current_node = root + self._max_iters = 8 + + def selection(self) -> LATSNode: + node = self.root + while node.children: + child = node.best_child() + if child is None: + break + node = child + return node + + def backprop(self, node: LATSNode, reward: float): + temp = node + while temp: + temp.update(reward) + temp = temp.parent diff --git a/applications/lats/humaneval/__init__.py b/applications/lats/humaneval/__init__.py new file mode 100644 index 0000000..ba7feca --- /dev/null +++ b/applications/lats/humaneval/__init__.py @@ -0,0 +1 @@ +# HumanEval data loading and execution diff --git a/applications/lats/humaneval/executor.py b/applications/lats/humaneval/executor.py new file mode 100644 index 0000000..8e78e6b --- /dev/null +++ b/applications/lats/humaneval/executor.py @@ -0,0 +1,82 @@ +""" +HumanEval executor: internal tests + full evaluate. Uses fresh globals per run to avoid cross-problem pollution. +""" +import re +from typing import List, Tuple + +from .timeout_utils import function_with_timeout + + +def _fresh_globals(): + """Fresh namespace for each evaluation to avoid function name pollution between problems.""" + import builtins + g = {"__builtins__": builtins} + exec("from typing import *", g) + return g + + +def run_internal_tests( + func: str, tests: List[str], timeout: int = 5 +) -> Tuple[bool, str, float]: + """Run internal tests; return (all_passed, feedback_string, reward_internal = passed/total).""" + success_tests = [] + failed_tests = [] + for test in tests: + g = _fresh_globals() + try: + function_with_timeout(exec, (f"{func}\n{test}", g), timeout) + success_tests.append(test) + except Exception: + out = get_test_output(func, test, timeout) + failed_tests.append(f"{test} # output: {out}") + feedback = "Tested passed:" + for t in success_tests: + feedback += f"\n{t}" + feedback += "\n\nTests failed:" + for t in failed_tests: + feedback += f"\n{t}" + n = len(tests) + reward_internal = (len(success_tests) / n) if n else 0.0 + return (len(failed_tests) == 0, feedback, reward_internal) + + +def get_test_output(func: str, assert_statement: str, timeout: int) -> str: + """Execute single assert and return actual output (for failure message).""" + g = _fresh_globals() + try: + exec(func, g) + s = re.sub(r"^assert\s+", "", assert_statement.strip()).split(" # ")[0].strip() + if " == " in s: + call_str = s.split(" == ")[0].strip() + else: + call_str = s + return str(function_with_timeout(eval, (call_str, g), timeout)) + except TimeoutError: + return "TIMEOUT" + except Exception as e: + return str(e) + + +def full_evaluate(entry_point: str, func: str, test: str, timeout: int = 10) -> bool: + """Official HumanEval full evaluation: func + test + check(entry_point). Uses fresh namespace.""" + code = f"{func}\n\n{test}\n\ncheck({entry_point})" + g = _fresh_globals() + try: + function_with_timeout(exec, (code, g), timeout) + return True + except Exception: + return False + + +def verify_evaluation(problem: dict) -> None: + """Startup check: wrong implementation must be rejected, else evaluation logic is buggy.""" + import sys + entry_point = problem.get("entry_point", "") + test = problem.get("test", "") + if not entry_point or not test: + return + wrong_impl = f"def {entry_point}(*args, **kwargs):\n return 0" + if full_evaluate(entry_point, wrong_impl, test, timeout=5): + print("ERROR: Evaluation bug: a wrong solution was marked as PASSED. Fix full_evaluate.") + sys.exit(1) + print("Evaluation verification passed (wrong solution correctly rejected).") diff --git a/applications/lats/humaneval/load.py b/applications/lats/humaneval/load.py new file mode 100644 index 0000000..9643a90 --- /dev/null +++ b/applications/lats/humaneval/load.py @@ -0,0 +1,53 @@ +""" +Load HumanEval dataset (.jsonl / .jsonl.gz) and parse internal tests from test string. +""" +import json +import gzip +import random +import re +from typing import List + + +def load_humaneval_jsonl(path: str) -> List[dict]: + """Load HumanEval items from .jsonl or .jsonl.gz.""" + items = [] + if path.endswith(".gz"): + with gzip.open(path, "rt", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + items.append(json.loads(line)) + else: + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + items.append(json.loads(line)) + return items + + +def parse_internal_tests_from_test(test_str: str, max_tests: int = 6) -> List[str]: + """Parse assert lines from HumanEval test string as internal tests (aligned with LATS number_of_tests).""" + asserts = [] + for line in test_str.splitlines(): + line = line.strip() + if line.startswith("assert candidate(") or (line.startswith("assert ") and "candidate(" in line): + asserts.append(line) + if not asserts: + return asserts + if len(asserts) > max_tests: + asserts = random.sample(asserts, max_tests) + return asserts + + +def extract_python_code(raw: str) -> str: + """Extract Python code block or first complete function from LLM output.""" + raw = (raw or "").strip() + code_match = re.search(r"```python\s*(.*?)\s*```", raw, re.DOTALL) + if code_match: + return code_match.group(1).strip() + if "def " in raw: + return raw + return "" diff --git a/applications/lats/humaneval/timeout_utils.py b/applications/lats/humaneval/timeout_utils.py new file mode 100644 index 0000000..4d8e0a9 --- /dev/null +++ b/applications/lats/humaneval/timeout_utils.py @@ -0,0 +1,31 @@ +""" +Timeout wrapper for test execution (no dependency on LATS repo). +""" +from threading import Thread + + +class _PropagatingThread(Thread): + def run(self): + self.exc = None + try: + self.ret = self._target(*self._args, **self._kwargs) + except BaseException as e: + self.exc = e + + def join(self, timeout=None): + super().join(timeout) + if self.exc: + raise self.exc + return self.ret + + +def function_with_timeout(func, args, timeout): + r = [] + def w(): + r.append(func(*args)) + t = _PropagatingThread(target=w) + t.start() + t.join(timeout) + if t.is_alive(): + raise TimeoutError() + return r[0] diff --git a/applications/lats/main.py b/applications/lats/main.py new file mode 100644 index 0000000..58ef03a --- /dev/null +++ b/applications/lats/main.py @@ -0,0 +1,176 @@ +""" +LATS (Language Agent Tree Search) – HumanEval on MASFactory. +Standard entry point: parse args, load dataset, build graph, run problems, tee to log. +Reference: https://arxiv.org/abs/2310.04406 and LanguageAgentTreeSearch-main/programming. +""" +import os +import sys +import json +import argparse + +# Ensure repo root (parent of lats/) is on path so "lats" package resolves when run from lats/ or repo root +_APP_ROOT = os.path.dirname(os.path.abspath(__file__)) +_REPO_ROOT = os.path.dirname(_APP_ROOT) +if _REPO_ROOT not in sys.path: + sys.path.insert(0, _REPO_ROOT) + +from lats.humaneval.load import load_humaneval_jsonl, parse_internal_tests_from_test +from lats.humaneval.executor import verify_evaluation +from lats.workflows.graph import ( + build_graph, + run_one_problem, + LATS_MAX_ITERS, + NUMBER_OF_TESTS, +) +from lats.utils.tee import tee, set_log_file, get_log_file +from lats.components.agents import set_print_code_attempts + + +def _default_dataset_path() -> str: + p = os.path.join(_APP_ROOT, "assets", "config", "defaults.json") + if os.path.isfile(p): + try: + with open(p, "r", encoding="utf-8") as f: + d = json.load(f) + if d.get("dataset_path"): + return d["dataset_path"] + except Exception: + pass + return "" + + +def main(): + parser = argparse.ArgumentParser( + description="LATS (Language Agent Tree Search) on HumanEval via MASFactory." + ) + parser.add_argument( + "--dataset", + default="", + help="HumanEval path: .jsonl or .jsonl.gz", + ) + parser.add_argument("--max_iters", type=int, default=LATS_MAX_ITERS) + parser.add_argument("--number_of_tests", type=int, default=NUMBER_OF_TESTS) + parser.add_argument("--limit", type=int, default=0, help="Limit number of problems (0 = all)") + parser.add_argument( + "--print-code", + action="store_true", + help="Print each generated attempt (default: final solution only)", + ) + parser.add_argument( + "--log", + type=str, + default="", + help="Append same output to file (e.g. logs/lats.log)", + ) + args = parser.parse_args() + + set_print_code_attempts(args.print_code) + log_file = None + if args.log: + try: + os.makedirs(os.path.dirname(args.log) or ".", exist_ok=True) + log_file = open(args.log, "a", encoding="utf-8") + set_log_file(log_file) + except Exception as e: + print(f"Warning: could not open log file {args.log}: {e}", flush=True) + + max_iters = args.max_iters + number_of_tests = args.number_of_tests + dataset_path = args.dataset or _default_dataset_path() + if not dataset_path: + dataset_path = os.path.join( + os.path.dirname(_APP_ROOT), + "1", + "LanguageAgentTreeSearch-main", + "programming", + "benchmarks", + "humaneval-py.jsonl", + ) + + dataset = load_humaneval_jsonl(dataset_path) + if args.limit > 0: + dataset = dataset[: args.limit] + + tee(f"Dataset: {dataset_path}", log_file) + tee(f"Loaded {len(dataset)} problems. max_iters={max_iters}, number_of_tests={number_of_tests}", log_file) + model_name = os.environ.get("LATS_MODEL", "gpt-4") + tee(f"Model: {model_name}", log_file) + + if dataset: + verify_evaluation({ + "entry_point": dataset[0].get("entry_point", ""), + "test": dataset[0].get("test", ""), + "prompt": dataset[0].get("prompt", ""), + }) + + g = build_graph() + + _vis_port = os.environ.get("MASFACTORY_VISUALIZER_PORT", "") + _vis_host = os.environ.get("MASFACTORY_VISUALIZER_HOST", "127.0.0.1") + try: + import masfactory.visualizer as _vis + _connected_bridge = _vis.connect_bridge(timeout_s=5.0) + if _connected_bridge is not None: + _connected_bridge.attach_graph(g) + _vis.get_bridge = lambda: _connected_bridge + print("Visualizer connected: runtime view enabled.") + elif _vis_port: + print("Visualizer: connection failed (runtime view disabled).") + print(f" Tried {_vis_host}:{_vis_port} — ensure MASFactory extension is open and listening on this port.") + except Exception as e: + if _vis_port: + print("Visualizer connect error:", e) + + api_key = os.environ.get("OPENAI_API_KEY", "").strip() + if not api_key: + print("Error: OPENAI_API_KEY is not set. Set it first, e.g.:") + print(" set OPENAI_API_KEY=sk-your-key (Windows)") + print(" export OPENAI_API_KEY=sk-your-key (Linux/Mac)") + print("Get a key: https://platform.openai.com/account/api-keys") + sys.exit(1) + print("Using OPENAI_API_KEY from env. (401 = invalid key; check/regenerate at https://platform.openai.com/account/api-keys)") + if not _vis_port: + print("Tip: set MASFACTORY_VISUALIZER_PORT to enable the visualizer runtime view.") + + num_success = 0 + for idx, item in enumerate(dataset): + problem = { + "name": item.get("name", item.get("task_id", "")), + "prompt": item.get("prompt", ""), + "entry_point": item.get("entry_point", ""), + "test": item.get("test", ""), + } + internal_tests = parse_internal_tests_from_test( + problem["test"], max_tests=number_of_tests + ) + try: + best_code, passed = run_one_problem( + problem, g, internal_tests, max_iters, number_of_tests + ) + except Exception as e: + best_code, passed = "", False + err_msg = str(e).split("\n")[0][:80] + tee(f"Warning: problem {idx+1} failed ({err_msg}), treating as not passed.", log_file) + if passed: + num_success += 1 + acc = round(num_success / (idx + 1), 2) + tee(f"completed {idx+1}/{len(dataset)}: acc = {acc}", log_file) + pname = problem.get("name", item.get("task_id", f"problem_{idx+1}")) + tee(f"\n--------------------- FINAL SOLUTION [{pname}] passed={bool(passed)} ---------------------", log_file) + tee(best_code if best_code else "(none)", log_file) + tee("------------------------------------------\n", log_file) + item["solution"] = best_code + item["is_solved"] = passed + item["acc"] = acc + + tee(f"Done. Pass@1 acc = {num_success}/{len(dataset)} = {round(num_success/len(dataset), 2)}", log_file) + if log_file is not None: + try: + log_file.close() + except Exception: + pass + set_log_file(None) + + +if __name__ == "__main__": + main() diff --git a/applications/lats/utils/__init__.py b/applications/lats/utils/__init__.py new file mode 100644 index 0000000..35cefe0 --- /dev/null +++ b/applications/lats/utils/__init__.py @@ -0,0 +1 @@ +# LATS application utilities diff --git a/applications/lats/utils/tee.py b/applications/lats/utils/tee.py new file mode 100644 index 0000000..fc64aab --- /dev/null +++ b/applications/lats/utils/tee.py @@ -0,0 +1,29 @@ +""" +Tee output to terminal and optional log file. +Used by main to duplicate progress/solutions to --log file. +""" +import sys +from typing import Optional, TextIO + +_lats_log_file: Optional[TextIO] = None + + +def set_log_file(f: Optional[TextIO]) -> None: + global _lats_log_file + _lats_log_file = f + + +def get_log_file() -> Optional[TextIO]: + return _lats_log_file + + +def tee(s: str, log_file: Optional[TextIO] = None) -> None: + """Print to stdout and append to log_file (or global _lats_log_file) if set.""" + print(s, flush=True) + f = log_file if log_file is not None else _lats_log_file + if f is not None: + try: + f.write(s.rstrip() + "\n") + f.flush() + except Exception: + pass diff --git a/applications/lats/workflows/__init__.py b/applications/lats/workflows/__init__.py new file mode 100644 index 0000000..0505e1d --- /dev/null +++ b/applications/lats/workflows/__init__.py @@ -0,0 +1 @@ +# LATS workflow (graph + controller) diff --git a/applications/lats/workflows/controller.py b/applications/lats/workflows/controller.py new file mode 100644 index 0000000..430bb30 --- /dev/null +++ b/applications/lats/workflows/controller.py @@ -0,0 +1,103 @@ +""" +LATS controller: MCTS selection, expand, backprop, terminate. Uses tree set by graph before each invoke. +""" +from typing import Any, Optional + +from ..components.tree import LATSNode, TreeManager, gather_context_from_tree + +_lats_tree: Optional[TreeManager] = None + + +def set_lats_tree(tm: TreeManager | None) -> None: + global _lats_tree + _lats_tree = tm + + +def lats_controller_logic(message: dict, _attrs: Any) -> bool: + """Return True to terminate loop; otherwise set reflexion_prompt and return False.""" + global _lats_tree + if _lats_tree is None: + message["final_code"] = "" + message["final_passed"] = False + return True + + action = message.get("action") or message.get("(not set yet)") + if action == "(not set yet)": + action = "" + observation = message.get("observation", "") + reward = message.get("reward", 0.0) + try: + reward = float(reward) if str(reward) != "(not set yet)" else 0.0 + except Exception: + reward = 0.0 + full_passed = message.get("full_passed", False) + if isinstance(full_passed, str) and "(not set yet)" in str(full_passed): + full_passed = False + full_passed = bool(full_passed) + reflection = message.get("reflection", "") + + root = _lats_tree.root + # First round: init root from first LLM output; else add child and backprop + if action and action not in ("(not set yet)", "Empty", "Invalid_Instruction"): + if not root.solution and root.visits == 0: + root.solution = action + root.test_feedback = observation + root.reflection = reflection + root.visits = 1 + root.value = reward + else: + selected = _lats_tree.current_node + child = LATSNode( + solution=action, parent=selected, depth=selected.depth + 1 + ) + child.test_feedback = observation + child.reflection = reflection + selected.children.append(child) + _lats_tree.backprop(child, reward) + + if full_passed or root.visits >= _lats_tree._max_iters: + best_node = root.best_child_value() if root.children else root + best_code = (best_node.solution if best_node else "") or action or "" + if full_passed and action: + best_code = action + message["final_code"] = best_code + message["final_passed"] = full_passed + return True + + selected = _lats_tree.selection() + _lats_tree.current_node = selected + path_impls, path_feedbacks, path_reflections = gather_context_from_tree( + selected + ) + + # Build reflexion prompt (aligned with generator_utils.generate_with_accumulated_context) + if not path_impls: + reflexion_prompt = ( + "You are an AI that only responds with Python code. Write your full implementation (restate the function signature). " + "Use a Python code block: ```python ... ```\n\n" + + (_lats_tree.problem.get("prompt") or "") + ) + else: + parts = [] + for i, (impl, fb, ref) in enumerate( + zip(path_impls, path_feedbacks, path_reflections) + ): + impl_short = impl[:2000] + "..." if len(impl) > 2000 else impl + parts.append( + f"[previous impl {i+1}]:\n```python\n{impl_short}\n```\n" + f"[unit test results {i+1}]:\n{fb}\n[reflection {i+1}]:\n{ref}" + ) + reflexion_prompt = ( + "You are an AI Python assistant. You will be given previous implementation(s), unit test results, and self-reflections. " + "Write your full improved implementation (restate the function signature). Use only a ```python ... ``` block.\n\n" + + "\n\n".join(parts) + + "\n\n[improved impl]:\n" + + (_lats_tree.problem.get("prompt") or "") + ) + + message["reflexion_prompt"] = reflexion_prompt + message["action"] = "(not set yet)" + message["observation"] = "(not set yet)" + message["reward"] = 0.0 + message["reflection"] = "" + return False diff --git a/applications/lats/workflows/graph.py b/applications/lats/workflows/graph.py new file mode 100644 index 0000000..00eec90 --- /dev/null +++ b/applications/lats/workflows/graph.py @@ -0,0 +1,149 @@ +""" +LATS workflow: build RootGraph + Loop (LLM -> Executor -> Reflection -> controller), run_one_problem. +""" +import os +from typing import List, Tuple + +from masfactory import RootGraph, NodeTemplate, Loop +from masfactory.core.message import ParagraphMessageFormatter + +from ..components.formatters import ContentMessageFormatter +from ..components.agents import ( + LATSLLMAgent, + ReflectionAgent, + set_print_code_attempts, +) +from ..components.tree import LATSNode, TreeManager +from .controller import lats_controller_logic, set_lats_tree +from ..humaneval.executor import full_evaluate, verify_evaluation + +# Paper/source run_lats_gpt4.sh: max_iters=8, number_of_tests=2 +LATS_MAX_ITERS = int(os.environ.get("LATS_MAX_ITERS", "8")) +NUMBER_OF_TESTS = int(os.environ.get("NUMBER_OF_TESTS", "2")) + +loop_nodes = [ + ( + "LLM_Agent", + LATSLLMAgent, + { + "instructions": "You output ONLY Python code in a ```python ... ``` block. No explanations. Restate the function signature in your implementation.", + "prompt_template": "{reflexion_prompt}", + "formatters": [ + ParagraphMessageFormatter(), + ContentMessageFormatter("content", merge_global="_lats_llm_passthrough"), + ], + }, + ), + ( + "Executor", + ReflectionAgent, + { + "role": "executor", + "instructions": "Run HumanEval internal tests and full evaluation.", + "pull_keys": {"problem": "problem", "internal_tests": "internal_tests", "content": "content"}, + "push_keys": {"observation": "observation", "reward": "reward", "action": "action", "full_passed": "full_passed"}, + }, + ), + ( + "Reflection", + ReflectionAgent, + { + "instructions": "You are a Python programming assistant. Given a function implementation and unit test results, write a few sentences explaining why the implementation is wrong. Do NOT output code, only the explanation.", + "prompt_template": "[function impl]:\n```python\n{action}\n```\n\n[unit test results]:\n{observation}\n\n[self-reflection]:", + "formatters": [ + ParagraphMessageFormatter(), + ContentMessageFormatter("reflection", merge_global="_lats_reflection_passthrough"), + ], + }, + ), +] + +LATSTemplate = NodeTemplate( + Loop, + max_iterations=LATS_MAX_ITERS, + terminate_condition_function=lats_controller_logic, + nodes=loop_nodes, + edges=[ + ("controller", "LLM_Agent", {"reflexion_prompt": "reflexion_prompt", "problem": "problem", "internal_tests": "internal_tests"}), + ("LLM_Agent", "Executor", {"content": "content", "problem": "problem", "internal_tests": "internal_tests"}), + ("Executor", "Reflection", {"action": "action", "observation": "observation", "reward": "reward", "full_passed": "full_passed", "problem": "problem", "internal_tests": "internal_tests"}), + ("Reflection", "controller", {"action": "action", "observation": "observation", "reward": "reward", "full_passed": "full_passed", "reflection": "reflection", "problem": "problem", "internal_tests": "internal_tests"}), + ], + pull_keys={"problem": "problem", "internal_tests": "internal_tests"}, + push_keys={"final_code": "final_code", "final_passed": "final_passed"}, +) + + +def build_graph() -> RootGraph: + """Build LATS RootGraph with single LATS node (Loop).""" + g = RootGraph( + name="LATS_Runner", + nodes=[("LATS", LATSTemplate)], + edges=[ + ("entry", "LATS", {"problem": "problem", "internal_tests": "internal_tests"}), + ("LATS", "exit", {"final_code": "final_code", "final_passed": "final_passed"}), + ], + ) + g.build() + # Wire Executor push_keys for visualizer + try: + lats_loop = getattr(g, "_nodes", {}).get("LATS") + if lats_loop is not None and hasattr(lats_loop, "_nodes"): + env_node = lats_loop._nodes.get("Executor") + if env_node is not None and hasattr(env_node, "set_push_keys"): + env_node.set_push_keys({ + "observation": "observation", + "reward": "reward", + "action": "action", + "full_passed": "full_passed", + }) + except Exception: + pass + return g + + +def run_one_problem( + problem: dict, + graph: RootGraph, + internal_tests: List[str], + max_iters: int, + number_of_tests: int, +) -> Tuple[str, bool]: + """Run LATS for one problem; return (best_solution, passed).""" + set_lats_tree(None) + prompt = problem.get("prompt", "") + simple_prompt = ( + "You output ONLY Python code in a ```python ... ``` block. No explanations. " + "Write your full implementation (restate the function signature).\n\n" + prompt + ) + root = LATSNode(solution="", context=prompt) + tm = TreeManager(problem, root) + tm._max_iters = max_iters + set_lats_tree(tm) + + initial_input = { + "problem": problem, + "internal_tests": internal_tests, + "reflexion_prompt": simple_prompt, + } + result, _ = graph.invoke(initial_input) + final_code = result.get("final_code", "") or "" + final_passed = result.get("final_passed", False) + if isinstance(final_passed, str) and "(not set yet)" in str(final_passed): + final_passed = False + final_passed = bool(final_passed) + if not final_code and tm and tm.root: + best_node = tm.root.best_child_value() if tm.root.children else tm.root + if best_node and getattr(best_node, "solution", None): + final_code = best_node.solution + elif getattr(tm.root, "solution", None): + final_code = tm.root.solution + if final_code and not final_passed: + final_passed = full_evaluate( + tm.problem.get("entry_point", ""), + final_code, + tm.problem.get("test", ""), + timeout=10, + ) + set_lats_tree(None) + return final_code, final_passed diff --git a/masfactory-visualizer/package-lock.json b/masfactory-visualizer/package-lock.json index b0dbd0f..0ca87b1 100644 --- a/masfactory-visualizer/package-lock.json +++ b/masfactory-visualizer/package-lock.json @@ -33,9 +33,9 @@ "license": "MIT" }, "node_modules/@eslint-community/eslint-utils": { - "version": "4.9.0", - "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.0.tgz", - "integrity": "sha512-ayVFHdtZ+hsq1t2Dy24wCmGXGe4q9Gu3smhLYALJrr473ZH27MsnSL+LKUlimp4BWJqMDMLmPpx/Q9R3OAlL4g==", + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.1.tgz", + "integrity": "sha512-phrYmNiYppR7znFEdqgfWHXR6NCkZEK7hwWDHZUjit/2/U0r6XvkDl0SYnoM51Hq7FhCGdLDT6zxCCOY1hexsQ==", "dev": true, "license": "MIT", "dependencies": { @@ -101,22 +101,22 @@ } }, "node_modules/@eslint/config-helpers": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.1.tgz", - "integrity": "sha512-csZAzkNhsgwb0I/UAV6/RGFTbiakPCf0ZrGmrIxQpYvGZ00PhTkSnyKNolphgIvmnJeGw6rcGVEXfTzUnFuEvw==", + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.2.tgz", + "integrity": "sha512-gBrxN88gOIf3R7ja5K9slwNayVcZgK6SOUORm2uBzTeIEfeVaIhOpCtTox3P6R7o2jLFwLFTLnC7kU/RGcYEgw==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@eslint/core": "^0.16.0" + "@eslint/core": "^0.17.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, "node_modules/@eslint/core": { - "version": "0.16.0", - "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.16.0.tgz", - "integrity": "sha512-nmC8/totwobIiFcGkDza3GIKfAw1+hLiYVrh3I1nIomQ8PEr5cxg34jnkmGawul/ep52wGRAcyeDCNtWKSOj4Q==", + "version": "0.17.0", + "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.17.0.tgz", + "integrity": "sha512-yL/sLrpmtDaFEiUj1osRP4TI2MDz1AddJL+jZ7KSqvBuliN4xqYY54IfdN8qD8Toa6g1iloph1fxQNkjOxrrpQ==", "dev": true, "license": "Apache-2.0", "dependencies": { @@ -127,9 +127,9 @@ } }, "node_modules/@eslint/eslintrc": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.1.tgz", - "integrity": "sha512-gtF186CXhIl1p4pJNGZw8Yc6RlshoePRvE0X91oPGb3vZ8pM3qOS9W9NGPat9LziaBV7XrJWGylNQXkGcnM3IQ==", + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.3.tgz", + "integrity": "sha512-Kr+LPIUVKz2qkx1HAMH8q1q6azbqBAsXJUxBl/ODDuVPX45Z9DfwB8tPjTi6nNZ8BuM3nbJxC5zCAg5elnBUTQ==", "dev": true, "license": "MIT", "dependencies": { @@ -139,7 +139,7 @@ "globals": "^14.0.0", "ignore": "^5.2.0", "import-fresh": "^3.2.1", - "js-yaml": "^4.1.0", + "js-yaml": "^4.1.1", "minimatch": "^3.1.2", "strip-json-comments": "^3.1.1" }, @@ -185,9 +185,9 @@ } }, "node_modules/@eslint/js": { - "version": "9.38.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.38.0.tgz", - "integrity": "sha512-UZ1VpFvXf9J06YG9xQBdnzU+kthors6KjhMAl6f4gH4usHyh31rUf2DLGInT8RFYIReYXNSydgPY0V2LuWgl7A==", + "version": "9.39.3", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.3.tgz", + "integrity": "sha512-1B1VkCq6FuUNlQvlBYb+1jDu/gV297TIs/OeiaSR9l1H27SVW55ONE1e1Vp16NqP683+xEGzxYtv4XCiDPaQiw==", "dev": true, "license": "MIT", "engines": { @@ -208,13 +208,13 @@ } }, "node_modules/@eslint/plugin-kit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.4.0.tgz", - "integrity": "sha512-sB5uyeq+dwCWyPi31B2gQlVlo+j5brPlWx4yZBrEaRo/nhdDE8Xke1gsGgtiBdaBTxuTkceLVuVt/pclrasb0A==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.4.1.tgz", + "integrity": "sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@eslint/core": "^0.16.0", + "@eslint/core": "^0.17.0", "levn": "^0.4.1" }, "engines": { @@ -329,44 +329,6 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", - "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", - "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", - "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - }, - "engines": { - "node": ">= 8" - } - }, "node_modules/@pkgjs/parseargs": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", @@ -407,9 +369,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "22.18.12", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.18.12.tgz", - "integrity": "sha512-BICHQ67iqxQGFSzfCFTT7MRQ5XcBjG5aeKh5Ok38UBbPe5fxTyE+aHFxwVrGyr8GNlqFMLKD1D3P2K/1ks8tog==", + "version": "22.19.11", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.11.tgz", + "integrity": "sha512-BH7YwL6rA93ReqeQS1c4bsPpcfOmJasG+Fkr6Y59q83f9M1WcBRHR2vM+P9eOisYRcN3ujQoiZY8uk5W+1WL8w==", "dev": true, "license": "MIT", "dependencies": { @@ -417,28 +379,27 @@ } }, "node_modules/@types/vscode": { - "version": "1.75.1", - "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.75.1.tgz", - "integrity": "sha512-emg7wdsTFzdi+elvoyoA+Q8keEautdQHyY5LNmHVM4PTpY8JgOTVADrGVyXGepJ6dVW2OS5/xnLUWh+nZxvdiA==", + "version": "1.109.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.109.0.tgz", + "integrity": "sha512-0Pf95rnwEIwDbmXGC08r0B4TQhAbsHQ5UyTIgVgoieDe4cOnf92usuR5dEczb6bTKEp7ziZH4TV1TRGPPCExtw==", "dev": true, "license": "MIT" }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.46.2.tgz", - "integrity": "sha512-ZGBMToy857/NIPaaCucIUQgqueOiq7HeAKkhlvqVV4lm089zUFW6ikRySx2v+cAhKeUCPuWVHeimyk6Dw1iY3w==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.56.0.tgz", + "integrity": "sha512-lRyPDLzNCuae71A3t9NEINBiTn7swyOhvUj3MyUOxb8x6g6vPEFoOU+ZRmGMusNC3X3YMhqMIX7i8ShqhT74Pw==", "dev": true, "license": "MIT", "dependencies": { - "@eslint-community/regexpp": "^4.10.0", - "@typescript-eslint/scope-manager": "8.46.2", - "@typescript-eslint/type-utils": "8.46.2", - "@typescript-eslint/utils": "8.46.2", - "@typescript-eslint/visitor-keys": "8.46.2", - "graphemer": "^1.4.0", - "ignore": "^7.0.0", + "@eslint-community/regexpp": "^4.12.2", + "@typescript-eslint/scope-manager": "8.56.0", + "@typescript-eslint/type-utils": "8.56.0", + "@typescript-eslint/utils": "8.56.0", + "@typescript-eslint/visitor-keys": "8.56.0", + "ignore": "^7.0.5", "natural-compare": "^1.4.0", - "ts-api-utils": "^2.1.0" + "ts-api-utils": "^2.4.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -448,24 +409,23 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "@typescript-eslint/parser": "^8.46.2", - "eslint": "^8.57.0 || ^9.0.0", + "@typescript-eslint/parser": "^8.56.0", + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/parser": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.46.2.tgz", - "integrity": "sha512-BnOroVl1SgrPLywqxyqdJ4l3S2MsKVLDVxZvjI1Eoe8ev2r3kGDo+PcMihNmDE+6/KjkTubSJnmqGZZjQSBq/g==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.56.0.tgz", + "integrity": "sha512-IgSWvLobTDOjnaxAfDTIHaECbkNlAlKv2j5SjpB2v7QHKv1FIfjwMy8FsDbVfDX/KjmCmYICcw7uGaXLhtsLNg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { - "@typescript-eslint/scope-manager": "8.46.2", - "@typescript-eslint/types": "8.46.2", - "@typescript-eslint/typescript-estree": "8.46.2", - "@typescript-eslint/visitor-keys": "8.46.2", - "debug": "^4.3.4" + "@typescript-eslint/scope-manager": "8.56.0", + "@typescript-eslint/types": "8.56.0", + "@typescript-eslint/typescript-estree": "8.56.0", + "@typescript-eslint/visitor-keys": "8.56.0", + "debug": "^4.4.3" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -475,20 +435,20 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/project-service": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.46.2.tgz", - "integrity": "sha512-PULOLZ9iqwI7hXcmL4fVfIsBi6AN9YxRc0frbvmg8f+4hQAjQ5GYNKK0DIArNo+rOKmR/iBYwkpBmnIwin4wBg==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.56.0.tgz", + "integrity": "sha512-M3rnyL1vIQOMeWxTWIW096/TtVP+8W3p/XnaFflhmcFp+U4zlxUxWj4XwNs6HbDeTtN4yun0GNTTDBw/SvufKg==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.46.2", - "@typescript-eslint/types": "^8.46.2", - "debug": "^4.3.4" + "@typescript-eslint/tsconfig-utils": "^8.56.0", + "@typescript-eslint/types": "^8.56.0", + "debug": "^4.4.3" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -502,14 +462,14 @@ } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.46.2.tgz", - "integrity": "sha512-LF4b/NmGvdWEHD2H4MsHD8ny6JpiVNDzrSZr3CsckEgCbAGZbYM4Cqxvi9L+WqDMT+51Ozy7lt2M+d0JLEuBqA==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.56.0.tgz", + "integrity": "sha512-7UiO/XwMHquH+ZzfVCfUNkIXlp/yQjjnlYUyYz7pfvlK3/EyyN6BK+emDmGNyQLBtLGaYrTAI6KOw8tFucWL2w==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.46.2", - "@typescript-eslint/visitor-keys": "8.46.2" + "@typescript-eslint/types": "8.56.0", + "@typescript-eslint/visitor-keys": "8.56.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -520,9 +480,9 @@ } }, "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.46.2.tgz", - "integrity": "sha512-a7QH6fw4S57+F5y2FIxxSDyi5M4UfGF+Jl1bCGd7+L4KsaUY80GsiF/t0UoRFDHAguKlBaACWJRmdrc6Xfkkag==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.56.0.tgz", + "integrity": "sha512-bSJoIIt4o3lKXD3xmDh9chZcjCz5Lk8xS7Rxn+6l5/pKrDpkCwtQNQQwZ2qRPk7TkUYhrq3WPIHXOXlbXP0itg==", "dev": true, "license": "MIT", "engines": { @@ -537,17 +497,17 @@ } }, "node_modules/@typescript-eslint/type-utils": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.46.2.tgz", - "integrity": "sha512-HbPM4LbaAAt/DjxXaG9yiS9brOOz6fabal4uvUmaUYe6l3K1phQDMQKBRUrr06BQkxkvIZVVHttqiybM9nJsLA==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.56.0.tgz", + "integrity": "sha512-qX2L3HWOU2nuDs6GzglBeuFXviDODreS58tLY/BALPC7iu3Fa+J7EOTwnX9PdNBxUI7Uh0ntP0YWGnxCkXzmfA==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.46.2", - "@typescript-eslint/typescript-estree": "8.46.2", - "@typescript-eslint/utils": "8.46.2", - "debug": "^4.3.4", - "ts-api-utils": "^2.1.0" + "@typescript-eslint/types": "8.56.0", + "@typescript-eslint/typescript-estree": "8.56.0", + "@typescript-eslint/utils": "8.56.0", + "debug": "^4.4.3", + "ts-api-utils": "^2.4.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -557,14 +517,14 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/types": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.46.2.tgz", - "integrity": "sha512-lNCWCbq7rpg7qDsQrd3D6NyWYu+gkTENkG5IKYhUIcxSb59SQC/hEQ+MrG4sTgBVghTonNWq42bA/d4yYumldQ==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.56.0.tgz", + "integrity": "sha512-DBsLPs3GsWhX5HylbP9HNG15U0bnwut55Lx12bHB9MpXxQ+R5GC8MwQe+N1UFXxAeQDvEsEDY6ZYwX03K7Z6HQ==", "dev": true, "license": "MIT", "engines": { @@ -576,22 +536,21 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.46.2.tgz", - "integrity": "sha512-f7rW7LJ2b7Uh2EiQ+7sza6RDZnajbNbemn54Ob6fRwQbgcIn+GWfyuHDHRYgRoZu1P4AayVScrRW+YfbTvPQoQ==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.56.0.tgz", + "integrity": "sha512-ex1nTUMWrseMltXUHmR2GAQ4d+WjkZCT4f+4bVsps8QEdh0vlBsaCokKTPlnqBFqqGaxilDNJG7b8dolW2m43Q==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/project-service": "8.46.2", - "@typescript-eslint/tsconfig-utils": "8.46.2", - "@typescript-eslint/types": "8.46.2", - "@typescript-eslint/visitor-keys": "8.46.2", - "debug": "^4.3.4", - "fast-glob": "^3.3.2", - "is-glob": "^4.0.3", - "minimatch": "^9.0.4", - "semver": "^7.6.0", - "ts-api-utils": "^2.1.0" + "@typescript-eslint/project-service": "8.56.0", + "@typescript-eslint/tsconfig-utils": "8.56.0", + "@typescript-eslint/types": "8.56.0", + "@typescript-eslint/visitor-keys": "8.56.0", + "debug": "^4.4.3", + "minimatch": "^9.0.5", + "semver": "^7.7.3", + "tinyglobby": "^0.2.15", + "ts-api-utils": "^2.4.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -605,16 +564,16 @@ } }, "node_modules/@typescript-eslint/utils": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.46.2.tgz", - "integrity": "sha512-sExxzucx0Tud5tE0XqR0lT0psBQvEpnpiul9XbGUB1QwpWJJAps1O/Z7hJxLGiZLBKMCutjTzDgmd1muEhBnVg==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.56.0.tgz", + "integrity": "sha512-RZ3Qsmi2nFGsS+n+kjLAYDPVlrzf7UhTffrDIKr+h2yzAlYP/y5ZulU0yeDEPItos2Ph46JAL5P/On3pe7kDIQ==", "dev": true, "license": "MIT", "dependencies": { - "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.46.2", - "@typescript-eslint/types": "8.46.2", - "@typescript-eslint/typescript-estree": "8.46.2" + "@eslint-community/eslint-utils": "^4.9.1", + "@typescript-eslint/scope-manager": "8.56.0", + "@typescript-eslint/types": "8.56.0", + "@typescript-eslint/typescript-estree": "8.56.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -624,19 +583,19 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.46.2", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.46.2.tgz", - "integrity": "sha512-tUFMXI4gxzzMXt4xpGJEsBsTox0XbNQ1y94EwlD/CuZwFcQP79xfQqMhau9HsRc/J0cAPA/HZt1dZPtGn9V/7w==", + "version": "8.56.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.56.0.tgz", + "integrity": "sha512-q+SL+b+05Ud6LbEE35qe4A99P+htKTKVbyiNEe45eCbJFyh/HVK9QXwlrbz+Q4L8SOW4roxSVwXYj4DMBT7Ieg==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.46.2", - "eslint-visitor-keys": "^4.2.1" + "@typescript-eslint/types": "8.56.0", + "eslint-visitor-keys": "^5.0.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -647,13 +606,13 @@ } }, "node_modules/@typescript-eslint/visitor-keys/node_modules/eslint-visitor-keys": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz", - "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==", + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-5.0.1.tgz", + "integrity": "sha512-tD40eHxA35h0PEIZNeIjkHoDR4YjjJp34biM0mDvplBe//mB+IHCqHDGV7pxF+7MklTvighcCPPZC7ynWyjdTA==", "dev": true, "license": "Apache-2.0", "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + "node": "^20.19.0 || ^22.13.0 || >=24" }, "funding": { "url": "https://opencollective.com/eslint" @@ -701,12 +660,11 @@ } }, "node_modules/acorn": { - "version": "8.15.0", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", - "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", + "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1181,14 +1139,14 @@ "license": "MIT" }, "node_modules/enhanced-resolve": { - "version": "5.18.3", - "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.3.tgz", - "integrity": "sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==", + "version": "5.19.0", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.19.0.tgz", + "integrity": "sha512-phv3E1Xl4tQOShqSte26C7Fl84EwUdZsyOuSSk9qtAGyyQs2s3jJzComh+Abf4g187lUUAvH+H26omrqia2aGg==", "dev": true, "license": "MIT", "dependencies": { "graceful-fs": "^4.2.4", - "tapable": "^2.2.0" + "tapable": "^2.3.0" }, "engines": { "node": ">=10.13.0" @@ -1218,21 +1176,20 @@ } }, "node_modules/eslint": { - "version": "9.38.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.38.0.tgz", - "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==", + "version": "9.39.3", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.3.tgz", + "integrity": "sha512-VmQ+sifHUbI/IcSopBCF/HO3YiHQx/AVd3UVyYL6weuwW+HvON9VYn5l6Zl1WZzPWXPNZrSQpxwkkZ/VuvJZzg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", "@eslint/config-array": "^0.21.1", - "@eslint/config-helpers": "^0.4.1", - "@eslint/core": "^0.16.0", + "@eslint/config-helpers": "^0.4.2", + "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.38.0", - "@eslint/plugin-kit": "^0.4.0", + "@eslint/js": "9.39.3", + "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.4.2", @@ -1400,9 +1357,9 @@ } }, "node_modules/esquery": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz", - "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==", + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.7.0.tgz", + "integrity": "sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g==", "dev": true, "license": "BSD-3-Clause", "dependencies": { @@ -1452,23 +1409,6 @@ "dev": true, "license": "MIT" }, - "node_modules/fast-glob": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", - "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.8" - }, - "engines": { - "node": ">=8.6.0" - } - }, "node_modules/fast-json-stable-stringify": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", @@ -1483,16 +1423,6 @@ "dev": true, "license": "MIT" }, - "node_modules/fastq": { - "version": "1.19.1", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz", - "integrity": "sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "reusify": "^1.0.4" - } - }, "node_modules/file-entry-cache": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", @@ -1617,9 +1547,9 @@ } }, "node_modules/get-east-asian-width": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.4.0.tgz", - "integrity": "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==", + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.5.0.tgz", + "integrity": "sha512-CQ+bEO+Tva/qlmw24dCejulK5pMzVnUOFOijVogd3KQs07HnRIgp8TGipvCCRT06xeYEbpbgwaCxglFyiuIcmA==", "dev": true, "license": "MIT", "engines": { @@ -1630,9 +1560,10 @@ } }, "node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz", + "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", "dev": true, "license": "ISC", "dependencies": { @@ -1683,13 +1614,6 @@ "dev": true, "license": "ISC" }, - "node_modules/graphemer": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz", - "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==", - "dev": true, - "license": "MIT" - }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -1993,9 +1917,9 @@ } }, "node_modules/js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", "dev": true, "license": "MIT", "dependencies": { @@ -2136,30 +2060,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/micromatch": { - "version": "4.0.8", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", - "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", - "dev": true, - "license": "MIT", - "dependencies": { - "braces": "^3.0.3", - "picomatch": "^2.3.1" - }, - "engines": { - "node": ">=8.6" - } - }, "node_modules/mimic-function": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/mimic-function/-/mimic-function-5.0.1.tgz", @@ -2190,19 +2090,19 @@ } }, "node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.3.tgz", + "integrity": "sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==", "dev": true, - "license": "ISC", + "license": "BlueOak-1.0.0", "engines": { "node": ">=16 || 14 >=14.17" } }, "node_modules/mocha": { - "version": "11.7.4", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-11.7.4.tgz", - "integrity": "sha512-1jYAaY8x0kAZ0XszLWu14pzsf4KV740Gld4HXkhNTXwcHx4AUEDkPzgEHg9CM5dVcW+zv036tjpsEbLraPJj4w==", + "version": "11.7.5", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-11.7.5.tgz", + "integrity": "sha512-mTT6RgopEYABzXWFx+GcJ+ZQ32kp4fMf0xvpZIIfSq9Z8lC/++MtcCnQ9t5FP2veYEP95FIYSvW+U9fV4xrlig==", "dev": true, "license": "MIT", "dependencies": { @@ -2608,27 +2508,6 @@ "node": ">=6" } }, - "node_modules/queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, "node_modules/randombytes": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", @@ -2705,41 +2584,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/reusify": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", - "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", - "dev": true, - "license": "MIT", - "engines": { - "iojs": ">=1.0.0", - "node": ">=0.10.0" - } - }, - "node_modules/run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "queue-microtask": "^1.2.2" - } - }, "node_modules/safe-buffer": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", @@ -2748,9 +2592,9 @@ "license": "MIT" }, "node_modules/semver": { - "version": "7.7.3", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", - "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", "dev": true, "license": "ISC", "bin": { @@ -3010,7 +2854,7 @@ "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", - "deprecated": "Glob versions prior to v9 are no longer supported", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", "dev": true, "license": "ISC", "dependencies": { @@ -3041,6 +2885,54 @@ "node": "*" } }, + "node_modules/tinyglobby": { + "version": "0.2.15", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", + "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "fdir": "^6.5.0", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, + "node_modules/tinyglobby/node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/tinyglobby/node_modules/picomatch": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", + "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", @@ -3055,9 +2947,9 @@ } }, "node_modules/ts-api-utils": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.1.0.tgz", - "integrity": "sha512-CUgTZL1irw8u29bzrOD/nH85jqyc74D6SshFgujOIA7osm2Rz7dYH77agkx7H4FBNxDq7Cjf+IjaX/8zwFW+ZQ==", + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz", + "integrity": "sha512-3TaVTaAv2gTiMB35i3FiGJaRfwb3Pyn/j3m/bfAvGe8FB7CF6u+LMYqYlDh7reQf7UNvoTvdfAqHGmPGOSsPmA==", "dev": true, "license": "MIT", "engines": { @@ -3086,7 +2978,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/masfactory-visualizer/webview-ui/package-lock.json b/masfactory-visualizer/webview-ui/package-lock.json index 760581c..b01a4ad 100644 --- a/masfactory-visualizer/webview-ui/package-lock.json +++ b/masfactory-visualizer/webview-ui/package-lock.json @@ -1008,7 +1008,6 @@ "resolved": "https://registry.npmjs.org/cytoscape/-/cytoscape-3.33.1.tgz", "integrity": "sha512-iJc4TwyANnOGR1OmWhsS9ayRS3s+XQ185FmuHObThD+5AeJCakAAbWv8KimMTt08xCCLNgneQwFp+JRJOr9qGQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10" } @@ -1334,7 +1333,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -1349,7 +1347,6 @@ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", @@ -1416,7 +1413,6 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.26.tgz", "integrity": "sha512-SJ/NTccVyAoNUJmkM9KUqPcYlY+u8OVL1X5EW9RIs3ch5H2uERxyyIUI4MRxVCSOiEcupX9xNGde1tL9ZKpimA==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.26", "@vue/compiler-sfc": "3.5.26", diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..2972e9e --- /dev/null +++ b/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "MASFactory", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} From 234d93662f4d57a5cdc636b57cb50fd785e5e706 Mon Sep 17 00:00:00 2001 From: ffffiuling <17692510923@163.com> Date: Wed, 11 Mar 2026 14:39:29 +0800 Subject: [PATCH 2/2] lats/programming --- applications/lats/README.md | 107 +++++++------- applications/lats/components/agents.py | 161 ++++---------------- applications/lats/components/formatters.py | 36 ----- applications/lats/workflows/graph.py | 162 ++++++++++++++------- 4 files changed, 191 insertions(+), 275 deletions(-) delete mode 100644 applications/lats/components/formatters.py diff --git a/applications/lats/README.md b/applications/lats/README.md index 50767d9..613ae9a 100644 --- a/applications/lats/README.md +++ b/applications/lats/README.md @@ -8,100 +8,94 @@ This directory is a [MASFactory](https://github.com/BUPT-GAMMA/MASFactory) appli ## Layout ``` -lats/ +applications/lats/ ├── main.py # Entry: argparse, load dataset, build graph, run loop, tee to log ├── README.md ├── assets/ │ └── config/ # Config (default dataset path, etc.); datasets not in repo │ └── defaults.json ├── workflows/ # Graph and controller -│ ├── graph.py # Build RootGraph, LATSTemplate, run_one_problem -│ └── controller.py # lats_controller_logic (MCTS select / expand / backprop / terminate) -├── components/ # Custom MASFactory components -│ ├── formatters.py # ContentMessageFormatter, passthrough dicts -│ ├── agents.py # LATSBaseAgent, LATSLLMAgent, ReflectionAgent, HumanEval executor +│ ├── graph.py # RootGraph, LATSTemplate (Loop with LLM_Agent, Executor, Reflection), run_one_problem +│ └── controller.py # lats_controller_logic (MCTS select / expand / backprop / terminate) +├── components/ +│ ├── agents.py # run_humaneval_forward, set_print_code_attempts (no custom Agent/CustomNode classes) │ └── tree.py # LATSNode, TreeManager, gather_context_from_tree ├── humaneval/ # HumanEval data and execution │ ├── load.py # load_humaneval_jsonl, parse_internal_tests_from_test, extract_python_code │ ├── executor.py # run_internal_tests, full_evaluate, verify_evaluation -│ └── timeout_utils.py # function_with_timeout +│ └── timeout_utils.py # function_with_timeout └── utils/ └── tee.py # Tee output to terminal and optional log file ``` -## Context and memory in this port - -In the LATS paper and some references, **context** and **memory** appear as conceptual (or explicit) elements. In this MASFactory application we do **not** add separate **Context** or **Memory** nodes. They are implemented as follows. - -### Context - -**Role:** Provide the LLM with the accumulated trajectory (previous code attempts, test results, and reflections) so it can produce the next, improved attempt. - -**Implementation:** Context is built **inside the controller** and passed to the LLM via the existing message flow: +## Design -1. After each **Reflection** step, the controller selects the next node (MCTS selection) and gets the path from that node back to the root. -2. `**gather_context_from_tree(selected)`** in `components/tree.py` collects along that path: - - previous **solutions** (code), - - **test_feedback** (unit test results), - - **reflections** (short explanations of failure). -3. The controller assembles these into a single string `**reflexion_prompt`** (with blocks like `[previous impl 1]`, `[unit test results 1]`, `[reflection 1]`, etc.). -4. `**reflexion_prompt**` is passed to **LLM_Agent** as the prompt for the next iteration. +- **No custom Agent or CustomNode subclasses.** The graph uses only MASFactory’s built-in **Agent** and **CustomNode** via **NodeTemplate**: + - **LLM_Agent**: `NodeTemplate(Agent, model=..., instructions=..., formatters=[ParagraphMessageFormatter(), TwinsFieldTextFormatter()])` + - **Executor**: `NodeTemplate(CustomNode, forward=run_humaneval_forward, pull_keys=..., push_keys=...)` + - **Reflection**: `NodeTemplate(Agent, model=..., instructions=..., formatters=[...])` +- **Messaging** is done via **edges** and Loop **pull_keys** (no Agent/MessageFormatter overloading for pass-through). Controller sends `reflexion_prompt` to LLM; controller sends `problem` and `internal_tests` to Executor; LLM sends `content` to Executor; Executor sends `action`/`observation` to Reflection and `action`/`observation`/`reward`/`full_passed` to controller; Reflection sends only `reflection` to controller. +- **Executor** and **Reflection** are separate nodes (executor logic in `run_humaneval_forward`; reflection is a plain Agent). NodeTemplates are wrapped in **Shared(...)** so the Loop config can be cloned without deepcopying them. -So “context” is **inlined into the prompt**: it is computed in `workflows/controller.py` and carried in the message key `reflexion_prompt` to the LLM node, without a dedicated Context node. - -### Memory - -**Role:** Persist the search tree (all tried solutions, feedback, rewards, and structure) across loop iterations. - -**Implementation:** Memory is the **search tree** maintained by the controller: +## Context and memory in this port -1. `**LATSNode`** (in `components/tree.py`) stores per-node state: `solution`, `test_feedback`, `reflection`, `value`, `visits`, `parent`, `children`. -2. `**TreeManager**` holds the `root`, `current_node`, and `_max_iters`, and implements **selection** (UCT), **backprop** (reward update), and tree growth (adding children when the Executor returns a new attempt). -3. The controller **reads and updates** this tree each loop: it appends new children, runs backprop, and uses `gather_context_from_tree` to build the next context. +In the LATS paper, **context** and **memory** are conceptual. In this app they are not separate nodes: -So “memory” is the **tree state** (nodes + manager) owned and updated by the controller logic; there is no separate Memory agent or node. The graph nodes you see in MASFactory are only: **LLM_Agent**, **Executor**, **Reflection**, and the **controller** (Loop’s terminate function). Context and memory are implemented **inside** the controller and the shared tree, not as extra nodes. +- **Context**: Built inside the controller and sent to the LLM as `reflexion_prompt` (previous attempts, test results, reflections). See `workflows/controller.py` and `gather_context_from_tree` in `components/tree.py`. +- **Memory**: The search tree (`LATSNode`, `TreeManager` in `components/tree.py`) held by the controller; it is updated each loop (selection, backprop, new children). ## Setup -From the repo root (parent of `lats/`): +From the MASFactory repo root: ```bash -# Install MASFactory and dependencies (openai, etc.) pip install masfactory openai - -# Optional: set default dataset in assets/config/defaults.json -# "dataset_path": "path/to/HumanEval.jsonl.gz" ``` +Optional: set default dataset in `assets/config/defaults.json` (`"dataset_path": "path/to/HumanEval.jsonl.gz"`). + Environment variables: -- **OPENAI_API_KEY** (required) -- **OPENAI_API_BASE** (optional, for proxy/custom endpoint) -- **LATS_MODEL** (optional, default `gpt-4`) -- **LATS_MAX_ITERS** (optional, default `8`) -- **NUMBER_OF_TESTS** (optional, default `2`) -- **MASFACTORY_VISUALIZER_PORT** (optional, for runtime view) +| Variable | Description | +|----------|-------------| +| **OPENAI_API_KEY** | Required | +| **OPENAI_API_BASE** | Optional (proxy/custom endpoint) | +| **LATS_MODEL** | Optional, default `gpt-4` | +| **LATS_MAX_ITERS** | Optional, default `8` | +| **NUMBER_OF_TESTS** | Optional, default `2` | +| **MASFACTORY_VISUALIZER_PORT** | Optional, for runtime view | + +Example (PowerShell): + +```powershell +$env:OPENAI_API_KEY="your-key" +$env:OPENAI_API_BASE="https://your-endpoint/v1/" +$env:LATS_MODEL="你的模型" +... +``` ## Run -From the repo root (e.g. `D:\PE`): +From the **MASFactory repo root**: ```bash -# Default dataset path may be read from assets/config/defaults.json -python lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --log logs/lats.log +python applications/lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --log logs/lats.log ``` -Examples: +Or from **applications/lats**: ```bash -# Limit to 5 problems, write same output to log file -python lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --limit 5 --log logs/lats.log +python main.py --dataset "path/to/HumanEval.jsonl.gz" --log logs/lats.log +``` -# Print every attempt (not only final solution) -python lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --print-code --log logs/lats.log +Examples: + +```bash +# Limit to 5 problems +python applications/lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --limit 5 --log logs/lats.log -# Paper-aligned defaults: max_iters=8, number_of_tests=2 (no need to pass if using env or defaults) -python lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --log logs/lats.log +# Paper-aligned: max_iters=8, number_of_tests=2 (defaults) +python applications/lats/main.py --dataset "path/to/HumanEval.jsonl.gz" --log logs/lats.log ``` Output is printed to the terminal and, when `--log` is set, appended to the given file. @@ -109,5 +103,4 @@ Output is printed to the terminal and, when `--log` is set, appended to the give ## Metrics - **Pass@1**: fraction of problems for which the best solution passes the full HumanEval test. -- Defaults align with the upstream GPT-4 run script: `max_iters=8`, `number_of_tests=2`. - +- Defaults match the upstream GPT-4 setup: `max_iters=8`, `number_of_tests=2`. diff --git a/applications/lats/components/agents.py b/applications/lats/components/agents.py index 474aa49..f2ea007 100644 --- a/applications/lats/components/agents.py +++ b/applications/lats/components/agents.py @@ -1,40 +1,26 @@ """ -LATS agents: base, LLM, Reflection (and Executor implemented as ReflectionAgent with role=executor). +Helper utilities for the LATS HumanEval executor. + +This module provides only: +- set_print_code_attempts (for --print-code) +- run_humaneval_forward (used by CustomNode in the workflow) + +No custom Agent or CustomNode subclasses; the graph uses system Agent and CustomNode via NodeTemplate. """ -import os -from masfactory import Agent, OpenAIModel -from masfactory.core.message import ParagraphMessageFormatter -from . import formatters as fmt -from .tree import LATSNode from ..humaneval.load import extract_python_code, parse_internal_tests_from_test from ..humaneval.executor import run_internal_tests, full_evaluate from ..utils.tee import tee, get_log_file -# Model instance (injected or from env) -model_instance = OpenAIModel( - api_key=os.environ.get("OPENAI_API_KEY", ""), - base_url=os.environ.get("OPENAI_API_BASE", ""), - model_name=os.environ.get("LATS_MODEL", "gpt-4"), -) - -# When True, print each attempt body to terminal and log (--print-code) _print_code_attempts = False -def set_print_code_attempts(value: bool): +def set_print_code_attempts(value: bool) -> None: + """Enable or disable printing each generated code attempt.""" global _print_code_attempts _print_code_attempts = value -_ENV_PUSH_KEYS = { - "observation": "observation", - "reward": "reward", - "action": "action", - "full_passed": "full_passed", -} - - def _print_generated_func_body(func_body: str, problem_name: str = "") -> None: """Print generated code to terminal and log (if --log and --print-code).""" if not _print_code_attempts: @@ -47,12 +33,29 @@ def _print_generated_func_body(func_body: str, problem_name: str = "") -> None: tee("------------------------------------------\n", get_log_file()) -def _run_humaneval_forward(input_dict: dict) -> dict: - """HumanEval execution (originally HumanEvalEnvironment._forward). Used by ReflectionAgent with role=executor.""" - content = input_dict.get("action", "") or input_dict.get("content", "") +def run_humaneval_forward(input_dict: dict, attrs: dict | None = None) -> dict: + """HumanEval execution (originally HumanEvalEnvironment._forward). + + Accept both edge-passed fields (message) and attribute-based fields (attrs), + and prefer explicit message values when present. + """ + attrs = attrs or {} + content = ( + input_dict.get("action", "") + or input_dict.get("content", "") + or (attrs.get("content", "") if isinstance(attrs, dict) else "") + ) raw = str(content).strip() - problem = input_dict.get("problem") or {} - internal_tests = input_dict.get("internal_tests") or [] + + problem = input_dict.get("problem") + if not isinstance(problem, dict): + candidate = attrs.get("problem") if isinstance(attrs, dict) else None + problem = candidate if isinstance(candidate, dict) else {} + + internal_tests = input_dict.get("internal_tests") + if not isinstance(internal_tests, list): + candidate_tests = attrs.get("internal_tests") if isinstance(attrs, dict) else None + internal_tests = candidate_tests if isinstance(candidate_tests, list) else [] entry_point = problem.get("entry_point", "") test = problem.get("test", "") prompt = problem.get("prompt", "") @@ -97,105 +100,3 @@ def _run_humaneval_forward(input_dict: dict) -> dict: "problem": problem, "internal_tests": internal_tests, } - - -class LATSBaseAgent(Agent): - """Base agent: config merged into kwargs; role can be used by subclasses (e.g. ReflectionAgent as executor).""" - - def __init__(self, name, *args, **kwargs): - if args and isinstance(args[0], dict): - kwargs = {**args[0], **kwargs} - args = () - self._role = kwargs.pop("role", None) - kwargs.setdefault("model", model_instance) - super().__init__(name, *args, **kwargs) - - -class LATSLLMAgent(LATSBaseAgent): - """Pass-through problem/internal_tests; formatter merges _lats_llm_passthrough to satisfy output_keys.""" - - def step(self, input_dict: dict) -> dict: - fmt._lats_llm_passthrough = { - "problem": input_dict.get("problem"), - "internal_tests": input_dict.get("internal_tests"), - } - return super().step(input_dict) - - def _forward(self, input_dict: dict) -> dict: - out = super()._forward(input_dict) - out["problem"] = input_dict.get("problem") - out["internal_tests"] = input_dict.get("internal_tests") - if "content" not in out or not str(out.get("content", "")).strip(): - out["content"] = ( - out.get("content") - or out.get("action") - or out.get("response") - or out.get("text") - or str(out) - ) - return out - - -class ReflectionAgent(LATSBaseAgent): - """Reflection node. When config role=executor, same class acts as Executor (HumanEval runner) for visualizer.""" - - def __init__(self, name, *args, **kwargs): - super().__init__(name, *args, **kwargs) - if getattr(self, "_role", None) == "executor": - self._push_keys = dict(_ENV_PUSH_KEYS) - - @property - def push_keys(self): - if getattr(self, "_role", None) == "executor": - return dict(_ENV_PUSH_KEYS) - return super().push_keys - - def step(self, input_dict: dict) -> dict: - if getattr(self, "_role", None) != "executor": - fmt._lats_reflection_passthrough = { - k: input_dict.get(k) - for k in ( - "action", - "observation", - "reward", - "full_passed", - "problem", - "internal_tests", - ) - if k in input_dict - } - return super().step(input_dict) - - def _forward(self, input_dict: dict) -> dict: - if getattr(self, "_role", None) == "executor": - ctx = None - result = {} - try: - from masfactory.visualizer import get_bridge - bridge = get_bridge() if get_bridge else None - if bridge is not None: - ctx = bridge.node_start(self, input_dict) - except Exception: - pass - try: - result = _run_humaneval_forward(input_dict) - finally: - if ctx is not None: - try: - from masfactory.visualizer import get_bridge as _gb - b = _gb() if _gb else None - if b is not None: - b.node_end(ctx, result, node=self) - except Exception: - pass - return result - out = super()._forward(input_dict) - ref = (out.get("content") or out.get("action") or str(out)).strip() - out = {**out, "reflection": ref} - out["problem"] = input_dict.get("problem") - out["internal_tests"] = input_dict.get("internal_tests") - out["action"] = input_dict.get("action") - out["observation"] = input_dict.get("observation") - out["reward"] = input_dict.get("reward") - out["full_passed"] = input_dict.get("full_passed", False) - return out diff --git a/applications/lats/components/formatters.py b/applications/lats/components/formatters.py deleted file mode 100644 index df4216f..0000000 --- a/applications/lats/components/formatters.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Plain-text output formatter for LATS LLM (code/natural language, not JSON). -Merge keys from a module-level dict to satisfy output_keys. -""" -from masfactory.core.message import MessageFormatter - -# Filled by agents before step(); formatter merges these to satisfy output_keys -_lats_llm_passthrough = {} -_lats_reflection_passthrough = {} - - -class ContentMessageFormatter(MessageFormatter): - """Expose model raw output as a single key. merge_global names the module-level dict to merge for output_keys.""" - - def __init__(self, output_key: str = "content", merge_global: str = ""): - super().__init__() - self._output_key = output_key - self._merge_global = merge_global - self._is_input_formatter = True - self._is_output_formatter = True - self._agent_introducer = ( - f"Your response will be used as the value for the key '{output_key}'. " - "Provide your response as plain text only (e.g. Python code or a short explanation). Do not wrap in JSON." - ) - - def format(self, message: str) -> dict: - raw = (message.strip() if isinstance(message, str) and message else "") or "" - out = {self._output_key: raw} - if self._merge_global: - passthrough = globals().get(self._merge_global, {}) - if isinstance(passthrough, dict) and passthrough: - out.update(passthrough) - return out - - def dump(self, message: dict) -> str: - return str(message.get(self._output_key, "")) diff --git a/applications/lats/workflows/graph.py b/applications/lats/workflows/graph.py index 00eec90..3db503d 100644 --- a/applications/lats/workflows/graph.py +++ b/applications/lats/workflows/graph.py @@ -1,61 +1,88 @@ """ LATS workflow: build RootGraph + Loop (LLM -> Executor -> Reflection -> controller), run_one_problem. +Uses only system Agent and CustomNode via NodeTemplate (no custom agent classes). """ import os from typing import List, Tuple -from masfactory import RootGraph, NodeTemplate, Loop -from masfactory.core.message import ParagraphMessageFormatter +from masfactory import RootGraph, NodeTemplate, Loop, Agent, CustomNode, Shared +from masfactory.core.message import ParagraphMessageFormatter, TwinsFieldTextFormatter +from masfactory import OpenAIModel -from ..components.formatters import ContentMessageFormatter -from ..components.agents import ( - LATSLLMAgent, - ReflectionAgent, - set_print_code_attempts, -) +from ..components.agents import run_humaneval_forward from ..components.tree import LATSNode, TreeManager from .controller import lats_controller_logic, set_lats_tree -from ..humaneval.executor import full_evaluate, verify_evaluation +from ..humaneval.executor import full_evaluate # Paper/source run_lats_gpt4.sh: max_iters=8, number_of_tests=2 LATS_MAX_ITERS = int(os.environ.get("LATS_MAX_ITERS", "8")) NUMBER_OF_TESTS = int(os.environ.get("NUMBER_OF_TESTS", "2")) -loop_nodes = [ - ( - "LLM_Agent", - LATSLLMAgent, - { - "instructions": "You output ONLY Python code in a ```python ... ``` block. No explanations. Restate the function signature in your implementation.", - "prompt_template": "{reflexion_prompt}", - "formatters": [ - ParagraphMessageFormatter(), - ContentMessageFormatter("content", merge_global="_lats_llm_passthrough"), - ], - }, +EXECUTOR_PUSH_KEYS = { + "observation": "observation", + "reward": "reward", + "action": "action", + "full_passed": "full_passed", +} + +model_instance = OpenAIModel( + api_key=os.environ.get("OPENAI_API_KEY", ""), + base_url=os.environ.get("OPENAI_API_BASE", ""), + model_name=os.environ.get("LATS_MODEL", "gpt-4"), +) + +LLMAgentTemplate = NodeTemplate( + Agent, + model=model_instance, + instructions=( + "You output ONLY Python code in a ```python ... ``` block. " + "No explanations. Restate the function signature in your implementation." ), - ( - "Executor", - ReflectionAgent, - { - "role": "executor", - "instructions": "Run HumanEval internal tests and full evaluation.", - "pull_keys": {"problem": "problem", "internal_tests": "internal_tests", "content": "content"}, - "push_keys": {"observation": "observation", "reward": "reward", "action": "action", "full_passed": "full_passed"}, - }, + prompt_template="{reflexion_prompt}", + formatters=[ + ParagraphMessageFormatter(), + TwinsFieldTextFormatter(), + ], +) + +ExecutorTemplate = NodeTemplate( + CustomNode, + forward=run_humaneval_forward, + pull_keys={ + "problem": "problem", + "internal_tests": "internal_tests", + "content": "content", + }, + push_keys=dict(EXECUTOR_PUSH_KEYS), +) + +ReflectionTemplate = NodeTemplate( + Agent, + model=model_instance, + instructions=( + "You are a Python programming assistant. Given a function implementation " + "and unit test results, write a few sentences explaining why the " + "implementation is wrong. Do NOT output code, only the explanation." ), - ( - "Reflection", - ReflectionAgent, - { - "instructions": "You are a Python programming assistant. Given a function implementation and unit test results, write a few sentences explaining why the implementation is wrong. Do NOT output code, only the explanation.", - "prompt_template": "[function impl]:\n```python\n{action}\n```\n\n[unit test results]:\n{observation}\n\n[self-reflection]:", - "formatters": [ - ParagraphMessageFormatter(), - ContentMessageFormatter("reflection", merge_global="_lats_reflection_passthrough"), - ], - }, + prompt_template=( + "[function impl]:\n```python\n{action}\n```\n\n" + "[unit test results]:\n{observation}\n\n[self-reflection]:" ), + pull_keys={ + "action": "Candidate implementation under review.", + "observation": "Unit-test feedback for the candidate implementation.", + }, + formatters=[ + ParagraphMessageFormatter(), + TwinsFieldTextFormatter(), + ], +) + +# Shared() prevents NodeTemplate from being deepcopied when the Loop config is cloned (avoids RLock/pickle errors). +loop_nodes = [ + ("LLM_Agent", Shared(LLMAgentTemplate)), + ("Executor", Shared(ExecutorTemplate)), + ("Reflection", Shared(ReflectionTemplate)), ] LATSTemplate = NodeTemplate( @@ -64,10 +91,47 @@ terminate_condition_function=lats_controller_logic, nodes=loop_nodes, edges=[ - ("controller", "LLM_Agent", {"reflexion_prompt": "reflexion_prompt", "problem": "problem", "internal_tests": "internal_tests"}), - ("LLM_Agent", "Executor", {"content": "content", "problem": "problem", "internal_tests": "internal_tests"}), - ("Executor", "Reflection", {"action": "action", "observation": "observation", "reward": "reward", "full_passed": "full_passed", "problem": "problem", "internal_tests": "internal_tests"}), - ("Reflection", "controller", {"action": "action", "observation": "observation", "reward": "reward", "full_passed": "full_passed", "reflection": "reflection", "problem": "problem", "internal_tests": "internal_tests"}), + # Controller provides the next prompt; problem/internal_tests in loop attributes. + ( + "controller", + "LLM_Agent", + {"reflexion_prompt": "reflexion_prompt"}, + ), + # Controller also sends problem/internal_tests to Executor (so Executor gets them without LLM pass-through). + ( + "controller", + "Executor", + {"problem": "problem", "internal_tests": "internal_tests"}, + ), + # LLM_Agent outputs candidate code only. + ( + "LLM_Agent", + "Executor", + {"content": "content"}, + ), + # Executor sends implementation and test output to Reflection. + ( + "Executor", + "Reflection", + {"action": "action", "observation": "observation"}, + ), + # Executor sends evaluation state to controller. + ( + "Executor", + "controller", + { + "action": "action", + "observation": "observation", + "reward": "reward", + "full_passed": "full_passed", + }, + ), + # Reflection sends only the explanation to controller. + ( + "Reflection", + "controller", + {"reflection": "reflection"}, + ), ], pull_keys={"problem": "problem", "internal_tests": "internal_tests"}, push_keys={"final_code": "final_code", "final_passed": "final_passed"}, @@ -85,18 +149,12 @@ def build_graph() -> RootGraph: ], ) g.build() - # Wire Executor push_keys for visualizer try: lats_loop = getattr(g, "_nodes", {}).get("LATS") if lats_loop is not None and hasattr(lats_loop, "_nodes"): env_node = lats_loop._nodes.get("Executor") if env_node is not None and hasattr(env_node, "set_push_keys"): - env_node.set_push_keys({ - "observation": "observation", - "reward": "reward", - "action": "action", - "full_passed": "full_passed", - }) + env_node.set_push_keys(dict(EXECUTOR_PUSH_KEYS)) except Exception: pass return g