From 7de2bdbbb82ccdafaefff3d46d6b09fa54730952 Mon Sep 17 00:00:00 2001 From: Wesley Simplicio Date: Sat, 30 May 2026 02:11:22 -0300 Subject: [PATCH 1/3] feat: add scratch codegen executor registry --- simplicio/scratch/codegen/__init__.py | 12 +++ simplicio/scratch/codegen/registry.py | 32 ++++++++ simplicio/scratch/codegen/types.py | 30 ++++++++ simplicio/scratch/executor.py | 32 +++++++- tests/python/test_scratch_codegen.py | 104 ++++++++++++++++++++++++++ 5 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 simplicio/scratch/codegen/__init__.py create mode 100644 simplicio/scratch/codegen/registry.py create mode 100644 simplicio/scratch/codegen/types.py create mode 100644 tests/python/test_scratch_codegen.py diff --git a/simplicio/scratch/codegen/__init__.py b/simplicio/scratch/codegen/__init__.py new file mode 100644 index 0000000..c55e134 --- /dev/null +++ b/simplicio/scratch/codegen/__init__.py @@ -0,0 +1,12 @@ +"""Deterministic code-generation executors for scratch tasks.""" + +from .registry import register_executor, registered_executors, try_execute +from .types import CodegenResult, TaskExecutor + +__all__ = [ + "CodegenResult", + "TaskExecutor", + "register_executor", + "registered_executors", + "try_execute", +] diff --git a/simplicio/scratch/codegen/registry.py b/simplicio/scratch/codegen/registry.py new file mode 100644 index 0000000..a6e5296 --- /dev/null +++ b/simplicio/scratch/codegen/registry.py @@ -0,0 +1,32 @@ +"""Registry for deterministic scratch task executors.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Iterable + +from ..plan_schema import Task +from ..stack_registry import Stack +from .types import CodegenResult, TaskExecutor + +_DEFAULT_EXECUTORS: list[TaskExecutor] = [] + + +def registered_executors() -> list[TaskExecutor]: + return list(_DEFAULT_EXECUTORS) + + +def register_executor(executor: TaskExecutor) -> None: + _DEFAULT_EXECUTORS.append(executor) + + +def try_execute( + task: Task, + project_dir: Path, + stack: Stack, + executors: Iterable[TaskExecutor] | None = None, +) -> CodegenResult | None: + for executor in executors if executors is not None else _DEFAULT_EXECUTORS: + if executor.can_handle(task, stack): + return executor.execute(task, project_dir, stack) + return None diff --git a/simplicio/scratch/codegen/types.py b/simplicio/scratch/codegen/types.py new file mode 100644 index 0000000..ae6003b --- /dev/null +++ b/simplicio/scratch/codegen/types.py @@ -0,0 +1,30 @@ +"""Shared contracts for deterministic scratch task executors.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from pathlib import Path + +from ..plan_schema import Task +from ..stack_registry import Stack + + +@dataclass +class CodegenResult: + passed: bool + files_modified: list[Path] = field(default_factory=list) + log: str = "" + fallback_to_llm: bool = False + + +class TaskExecutor(ABC): + name: str + + @abstractmethod + def can_handle(self, task: Task, stack: Stack) -> bool: + """Return True when this executor can handle the task mechanically.""" + + @abstractmethod + def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult: + """Apply the deterministic task edit and return the execution result.""" diff --git a/simplicio/scratch/executor.py b/simplicio/scratch/executor.py index 0265dc7..0dbf764 100644 --- a/simplicio/scratch/executor.py +++ b/simplicio/scratch/executor.py @@ -16,13 +16,13 @@ import json import os -import shutil import subprocess import time from dataclasses import dataclass, field from pathlib import Path from typing import Optional +from .codegen import CodegenResult, try_execute from .plan_schema import Plan, Task from .stack_registry import Stack @@ -125,14 +125,25 @@ def _execute_one_task(task: Task, project_dir: Path, can still be smoke-tested. When SIMPLICIO_MODEL IS set, defers to simplicio.pipeline via the adapter.""" t0 = time.perf_counter() + codegen_log = "" + + codegen_result = try_execute(task, project_dir, stack) + if codegen_result is not None: + codegen_log = codegen_result.log + if codegen_result.passed or not codegen_result.fallback_to_llm: + return _task_result_from_codegen(task, t0, codegen_result) if not os.environ.get("SIMPLICIO_MODEL"): # smoke-test mode: log the task but mark as skipped (no LLM call made) ms = int((time.perf_counter() - t0) * 1000) + fallback_note = ( + f"codegen fallback: {codegen_log[:200]}\n" + if codegen_log else "" + ) return TaskResult( id=task.id, target=task.target, passed=False, duration_ms=ms, skipped_reason="no SIMPLICIO_MODEL set; task generation skipped", - log_tail=f"goal={task.goal[:200]}", + log_tail=f"{fallback_note}goal={task.goal[:200]}", ) try: @@ -145,11 +156,28 @@ def _execute_one_task(task: Task, project_dir: Path, ) passed, log = run_task(task, project_dir, stack) + if codegen_log: + log = f"codegen fallback: {codegen_log}\n\n{log}" ms = int((time.perf_counter() - t0) * 1000) return TaskResult(id=task.id, target=task.target, passed=passed, duration_ms=ms, log_tail=log) +def _task_result_from_codegen( + task: Task, started_at: float, result: CodegenResult +) -> TaskResult: + ms = int((time.perf_counter() - started_at) * 1000) + files = ", ".join(str(path) for path in result.files_modified) + suffix = f"\nfiles_modified={files}" if files else "" + return TaskResult( + id=task.id, + target=task.target, + passed=result.passed, + duration_ms=ms, + log_tail=f"{result.log}{suffix}".strip(), + ) + + def execute_plan(plan: Plan, stack: Stack, parent_dir: Path, skip_install: bool = False) -> ExecutorReport: """Materialize the plan into parent_dir//.""" diff --git a/tests/python/test_scratch_codegen.py b/tests/python/test_scratch_codegen.py new file mode 100644 index 0000000..6815c06 --- /dev/null +++ b/tests/python/test_scratch_codegen.py @@ -0,0 +1,104 @@ +"""Tests for deterministic scratch codegen executor plumbing.""" + +from __future__ import annotations + +from pathlib import Path + +from simplicio.scratch.codegen import CodegenResult, TaskExecutor +from simplicio.scratch.codegen import registry as codegen_registry +from simplicio.scratch.executor import _execute_one_task +from simplicio.scratch.plan_schema import Task +from simplicio.scratch.stack_registry import Stack + + +def _task() -> Task: + return Task( + id="T01-codegen", + goal="add deterministic file", + target="src/app.py", + criteria="file exists", + constraints="no llm", + verify="pytest -q", + ) + + +def _stack(tmp_path: Path) -> Stack: + return Stack( + slug="py-fastapi", + path=tmp_path, + meta={"language": "Python", "framework": "FastAPI"}, + ) + + +class _Executor(TaskExecutor): + name = "fake" + + def __init__( + self, *, can_handle: bool = True, result: CodegenResult | None = None + ) -> None: + self._can_handle = can_handle + self._result = result or CodegenResult(passed=True, log="mechanical ok") + self.calls = 0 + + def can_handle(self, task: Task, stack: Stack) -> bool: + return self._can_handle + + def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult: + self.calls += 1 + return self._result + + +def test_empty_registry_returns_none(tmp_path, monkeypatch): + monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", []) + assert codegen_registry.try_execute(_task(), tmp_path, _stack(tmp_path)) is None + + +def test_registry_executes_first_matching_executor(tmp_path): + skipped = _Executor(can_handle=False) + matched = _Executor(result=CodegenResult(passed=True, log="matched")) + result = codegen_registry.try_execute( + _task(), tmp_path, _stack(tmp_path), [skipped, matched] + ) + assert result is not None + assert result.log == "matched" + assert skipped.calls == 0 + assert matched.calls == 1 + + +def test_successful_codegen_runs_without_model(tmp_path, monkeypatch): + executor = _Executor( + result=CodegenResult( + passed=True, files_modified=[tmp_path / "src/app.py"], log="done" + ) + ) + monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", [executor]) + monkeypatch.delenv("SIMPLICIO_MODEL", raising=False) + result = _execute_one_task(_task(), tmp_path, _stack(tmp_path)) + assert result.passed is True + assert result.skipped_reason is None + assert "done" in result.log_tail + assert "files_modified" in result.log_tail + + +def test_codegen_failure_without_fallback_does_not_call_llm(tmp_path, monkeypatch): + executor = _Executor(result=CodegenResult(passed=False, log="missing class")) + monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", [executor]) + monkeypatch.setenv("SIMPLICIO_MODEL", "fake-model") + result = _execute_one_task(_task(), tmp_path, _stack(tmp_path)) + assert result.passed is False + assert result.skipped_reason is None + assert result.log_tail == "missing class" + + +def test_codegen_fallback_preserves_existing_stub_mode(tmp_path, monkeypatch): + executor = _Executor( + result=CodegenResult( + passed=False, log="shape unsupported", fallback_to_llm=True + ) + ) + monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", [executor]) + monkeypatch.delenv("SIMPLICIO_MODEL", raising=False) + result = _execute_one_task(_task(), tmp_path, _stack(tmp_path)) + assert result.passed is False + assert result.skipped_reason == "no SIMPLICIO_MODEL set; task generation skipped" + assert "codegen fallback: shape unsupported" in result.log_tail From 7a459e9bcd39d8c7018e95f4e75b18cd48ed2216 Mon Sep 17 00:00:00 2001 From: Wesley Simplicio Date: Sat, 30 May 2026 04:21:38 -0300 Subject: [PATCH 2/3] feat: add deterministic scratch codegen --- pyproject.toml | 1 + simplicio/scratch/cli.py | 223 +++++--- simplicio/scratch/codegen/__init__.py | 10 + simplicio/scratch/codegen/python_cst.py | 149 +++++ simplicio/scratch/codegen/python_fastapi.py | 215 +++++++ simplicio/scratch/codegen/python_orm.py | 207 +++++++ simplicio/scratch/codegen/python_pydantic.py | 524 ++++++++++++++++++ simplicio/scratch/codegen/python_pytest.py | 405 ++++++++++++++ simplicio/scratch/codegen/registry.py | 17 +- simplicio/scratch/codegen/types.py | 1 + .../scratch/codegen/typescript_next_route.py | 334 +++++++++++ simplicio/scratch/executor.py | 150 +++-- simplicio/scratch/plan_schema.py | 45 +- simplicio/scratch/skill_opt.py | 49 +- simplicio/scratch/stack_registry.py | 31 +- simplicio/templates/stacks/go-gin/README.md | 33 ++ .../templates/stacks/go-gin/practices.md | 28 + simplicio/templates/stacks/go-gin/stack.json | 13 + .../templates/stacks/go-gin/tree/README.md | 10 + .../stacks/go-gin/tree/cmd/server/main.go | 14 + simplicio/templates/stacks/go-gin/tree/go.mod | 5 + .../go-gin/tree/internal/http/router.go | 18 + .../go-gin/tree/internal/http/router_test.go | 22 + simplicio/templates/stacks/go-gin/verify.json | 6 + .../stacks/ts-nextjs/tree/package.json | 1 + tests/python/test_scratch.py | 80 ++- tests/python/test_scratch_codegen_fastapi.py | 101 ++++ .../python/test_scratch_codegen_next_route.py | 189 +++++++ tests/python/test_scratch_codegen_orm.py | 94 ++++ tests/python/test_scratch_codegen_pydantic.py | 157 ++++++ tests/python/test_scratch_codegen_pytest.py | 108 ++++ 31 files changed, 3085 insertions(+), 155 deletions(-) create mode 100644 simplicio/scratch/codegen/python_cst.py create mode 100644 simplicio/scratch/codegen/python_fastapi.py create mode 100644 simplicio/scratch/codegen/python_orm.py create mode 100644 simplicio/scratch/codegen/python_pydantic.py create mode 100644 simplicio/scratch/codegen/python_pytest.py create mode 100644 simplicio/scratch/codegen/typescript_next_route.py create mode 100644 simplicio/templates/stacks/go-gin/README.md create mode 100644 simplicio/templates/stacks/go-gin/practices.md create mode 100644 simplicio/templates/stacks/go-gin/stack.json create mode 100644 simplicio/templates/stacks/go-gin/tree/README.md create mode 100644 simplicio/templates/stacks/go-gin/tree/cmd/server/main.go create mode 100644 simplicio/templates/stacks/go-gin/tree/go.mod create mode 100644 simplicio/templates/stacks/go-gin/tree/internal/http/router.go create mode 100644 simplicio/templates/stacks/go-gin/tree/internal/http/router_test.go create mode 100644 simplicio/templates/stacks/go-gin/verify.json create mode 100644 tests/python/test_scratch_codegen_fastapi.py create mode 100644 tests/python/test_scratch_codegen_next_route.py create mode 100644 tests/python/test_scratch_codegen_orm.py create mode 100644 tests/python/test_scratch_codegen_pydantic.py create mode 100644 tests/python/test_scratch_codegen_pytest.py diff --git a/pyproject.toml b/pyproject.toml index 0fbc2b0..79bb5d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ dependencies = [ "httpx>=0.27", "orjson>=3.10", "diskcache>=5.6", + "libcst>=1.8", ] [project.optional-dependencies] diff --git a/simplicio/scratch/cli.py b/simplicio/scratch/cli.py index a67ec04..9ee86be 100644 --- a/simplicio/scratch/cli.py +++ b/simplicio/scratch/cli.py @@ -8,6 +8,7 @@ simplicio scratch --show-stack simplicio scratch --plan-only "" --stack """ + from __future__ import annotations import argparse @@ -24,40 +25,86 @@ def _add_scratch_args(p: argparse.ArgumentParser) -> None: - p.add_argument("goal", nargs="?", default=None, - help="one-line description of the project to create") - p.add_argument("--stack", default=None, - help="explicit stack slug (e.g. py-fastapi); inferred if omitted") - p.add_argument("--name", default=None, - help="project directory name (derived from goal if omitted)") - p.add_argument("--dest", default=".", - help="parent directory where the project dir is created (default: cwd)") - p.add_argument("--planner", default=None, - help="override SIMPLICIO_PLANNER for this run only") - p.add_argument("--plan-only", action="store_true", - help="run planner and print the validated plan; skip execution") - p.add_argument("--skip-install", action="store_true", - help="skip package-manager install after scaffolding") - p.add_argument("--list-stacks", action="store_true", - help="print available stack templates") - p.add_argument("--show-stack", default=None, metavar="SLUG", - help="print readme + metadata for one stack") - p.add_argument("--list-recipes", action="store_true", - help="print registered scratch plan recipes") - p.add_argument("--slot", action="append", default=[], metavar="KEY=VALUE", - help="fill a required recipe slot; may be repeated") - p.add_argument("--json", action="store_true", - help="emit machine-readable JSON output where applicable") + p.add_argument( + "goal", + nargs="?", + default=None, + help="one-line description of the project to create", + ) + p.add_argument( + "--stack", + default=None, + help="explicit stack slug (e.g. py-fastapi); inferred if omitted", + ) + p.add_argument( + "--name", + default=None, + help="project directory name (derived from goal if omitted)", + ) + p.add_argument( + "--dest", + default=".", + help="parent directory where the project dir is created (default: cwd)", + ) + p.add_argument( + "--planner", default=None, help="override SIMPLICIO_PLANNER for this run only" + ) + p.add_argument( + "--plan-only", + action="store_true", + help="run planner and print the validated plan; skip execution", + ) + p.add_argument( + "--skip-install", + action="store_true", + help="skip package-manager install after scaffolding", + ) + p.add_argument( + "--list-stacks", action="store_true", help="print available stack templates" + ) + p.add_argument( + "--show-stack", + default=None, + metavar="SLUG", + help="print readme + metadata for one stack", + ) + p.add_argument( + "--list-recipes", + action="store_true", + help="print registered scratch plan recipes", + ) + p.add_argument( + "--slot", + action="append", + default=[], + metavar="KEY=VALUE", + help="fill a required recipe slot; may be repeated", + ) + p.add_argument( + "--json", + action="store_true", + help="emit machine-readable JSON output where applicable", + ) def _cmd_list(reg: StackRegistry, as_json: bool) -> int: stacks = reg.list() if as_json: - print(json.dumps([{ - "slug": s.slug, "language": s.language, "framework": s.framework, - "version": s.version, - "tags": s.meta.get("tags", []), - } for s in stacks], indent=2)) + print( + json.dumps( + [ + { + "slug": s.slug, + "language": s.language, + "framework": s.framework, + "version": s.version, + "tags": s.meta.get("tags", []), + } + for s in stacks + ], + indent=2, + ) + ) return 0 if not stacks: print("(no stack templates installed)") @@ -75,17 +122,27 @@ def _cmd_show(reg: StackRegistry, slug: str, as_json: bool) -> int: print(f"unknown stack: {slug}", file=sys.stderr) return 2 if as_json: - print(json.dumps({ - "slug": s.slug, "meta": s.meta, "verify": s.verify, - "readme": s.readme, "practices": s.practices, - }, indent=2)) + print( + json.dumps( + { + "slug": s.slug, + "meta": s.meta, + "verify": s.verify, + "readme": s.readme, + "practices": s.practices, + }, + indent=2, + ) + ) return 0 print(f"# {s.slug}") print(f"language : {s.language}") print(f"framework: {s.framework}") print(f"version : {s.version}") - print(f"verify : install={s.install_command!r} " - f"test={s.test_command!r} lint={s.lint_command!r}") + print( + f"verify : install={s.install_command!r} " + f"test={s.test_command!r} lint={s.lint_command!r}" + ) print() print("## README") print(s.readme or "(no README)") @@ -128,11 +185,7 @@ def _iter_recipes(reg: Any) -> list[Any]: def _recipe_summary(recipe: Any) -> dict[str, Any]: matches = _attr(recipe, "matches", []) or [] patterns = [getattr(pattern, "pattern", str(pattern)) for pattern in matches] - slots = ( - _attr(recipe, "slots_spec", None) - or _attr(recipe, "slots", None) - or {} - ) + slots = _attr(recipe, "slots_spec", None) or _attr(recipe, "slots", None) or {} if isinstance(slots, dict): slot_names = sorted(str(key) for key in slots) else: @@ -185,9 +238,7 @@ def _parse_slots(raw_slots: list[str]) -> dict[str, str]: if not sep or not key: raise ValueError(f"invalid --slot {raw!r}; use --slot key=value") if not value: - raise ValueError( - f"slot '{key}' requires a value; pass --slot {key}=X" - ) + raise ValueError(f"slot '{key}' requires a value; pass --slot {key}=X") slots[key] = value return slots @@ -239,20 +290,27 @@ def _infer_stack(reg: StackRegistry, goal: str) -> str | None: def _cmd_scratch(args: argparse.Namespace, reg: StackRegistry) -> int: goal = args.goal if not goal: - print("error: provide a goal, e.g. simplicio scratch \"CRUD for condo units\"", - file=sys.stderr) + print( + 'error: provide a goal, e.g. simplicio scratch "CRUD for condo units"', + file=sys.stderr, + ) return 2 stack_slug = args.stack or _infer_stack(reg, goal) if not stack_slug: - print("error: could not infer stack from goal; pass --stack . " - "List available with `simplicio scratch --list-stacks`.", - file=sys.stderr) + print( + "error: could not infer stack from goal; pass --stack . " + "List available with `simplicio scratch --list-stacks`.", + file=sys.stderr, + ) return 2 stack = reg.get(stack_slug) if stack is None: - print(f"error: unknown stack '{stack_slug}'. Run " - f"`simplicio scratch --list-stacks`.", file=sys.stderr) + print( + f"error: unknown stack '{stack_slug}'. Run " + f"`simplicio scratch --list-stacks`.", + file=sys.stderr, + ) return 2 try: @@ -266,8 +324,10 @@ def _cmd_scratch(args: argparse.Namespace, reg: StackRegistry) -> int: if args.planner: os.environ["SIMPLICIO_PLANNER"] = args.planner - print(f"[scratch] stack: {stack.slug} ({stack.language} / {stack.framework})", - file=sys.stderr) + print( + f"[scratch] stack: {stack.slug} ({stack.language} / {stack.framework})", + file=sys.stderr, + ) print(f"[scratch] project: {project_name}", file=sys.stderr) try: planner = planner_info() @@ -276,7 +336,7 @@ def _cmd_scratch(args: argparse.Namespace, reg: StackRegistry) -> int: print(f"[scratch] planner: {planner}", file=sys.stderr) if slots: print(f"[scratch] slots: {', '.join(sorted(slots))}", file=sys.stderr) - print(f"[scratch] generating plan...", file=sys.stderr) + print("[scratch] generating plan...", file=sys.stderr) try: plan = _generate_plan_with_slots(stack, goal, project_name, slots) @@ -288,18 +348,33 @@ def _cmd_scratch(args: argparse.Namespace, reg: StackRegistry) -> int: if args.plan_only: if args.json: - print(json.dumps({ - "version": plan.version, "stack": plan.stack, - "project_name": plan.project_name, "rationale": plan.rationale, - "tasks": [{"id": t.id, "goal": t.goal, "target": t.target, - "criteria": t.criteria, "constraints": t.constraints, - "verify": t.verify, "depends_on": t.depends_on} - for t in plan.tasks], - "deps_to_install": plan.deps_to_install, - "deps_dev": plan.deps_dev, - "test_command": plan.test_command, - "lint_command": plan.lint_command, - }, indent=2)) + print( + json.dumps( + { + "version": plan.version, + "stack": plan.stack, + "project_name": plan.project_name, + "rationale": plan.rationale, + "tasks": [ + { + "id": t.id, + "goal": t.goal, + "target": t.target, + "criteria": t.criteria, + "constraints": t.constraints, + "verify": t.verify, + "depends_on": t.depends_on, + } + for t in plan.tasks + ], + "deps_to_install": plan.deps_to_install, + "deps_dev": plan.deps_dev, + "test_command": plan.test_command, + "lint_command": plan.lint_command, + }, + indent=2, + ) + ) else: print(f"# scratch plan — {plan.project_name}") print(f"rationale: {plan.rationale}") @@ -312,9 +387,11 @@ def _cmd_scratch(args: argparse.Namespace, reg: StackRegistry) -> int: return 0 from .executor import execute_plan + try: - report = execute_plan(plan, stack, Path(args.dest), - skip_install=args.skip_install) + report = execute_plan( + plan, stack, Path(args.dest), skip_install=args.skip_install + ) except FileExistsError as e: print(f"[scratch] {e}", file=sys.stderr) return 4 @@ -324,10 +401,14 @@ def _cmd_scratch(args: argparse.Namespace, reg: StackRegistry) -> int: else: print(f"[scratch] done: {report.project_dir}", file=sys.stderr) print(f" files written: {len(report.files_written)}", file=sys.stderr) - print(f" install: {'ok' if report.install_ok else 'fail/skipped'}", - file=sys.stderr) - print(f" tasks : {report.tasks_passed}/{report.tasks_total} passed", - file=sys.stderr) + print( + f" install: {'ok' if report.install_ok else 'fail/skipped'}", + file=sys.stderr, + ) + print( + f" tasks : {report.tasks_passed}/{report.tasks_total} passed", + file=sys.stderr, + ) return 0 if report.tasks_passed == report.tasks_total else 1 diff --git a/simplicio/scratch/codegen/__init__.py b/simplicio/scratch/codegen/__init__.py index c55e134..1b851d6 100644 --- a/simplicio/scratch/codegen/__init__.py +++ b/simplicio/scratch/codegen/__init__.py @@ -1,11 +1,21 @@ """Deterministic code-generation executors for scratch tasks.""" +from .python_fastapi import PythonAddFastApiRouteExecutor +from .python_orm import PythonAddOrmFieldExecutor +from .python_pydantic import PythonAddPydanticSchemaExecutor +from .python_pytest import PythonAddPytestTestExecutor from .registry import register_executor, registered_executors, try_execute +from .typescript_next_route import TypeScriptAddNextRouteExecutor from .types import CodegenResult, TaskExecutor __all__ = [ "CodegenResult", + "PythonAddFastApiRouteExecutor", + "PythonAddOrmFieldExecutor", + "PythonAddPydanticSchemaExecutor", + "PythonAddPytestTestExecutor", "TaskExecutor", + "TypeScriptAddNextRouteExecutor", "register_executor", "registered_executors", "try_execute", diff --git a/simplicio/scratch/codegen/python_cst.py b/simplicio/scratch/codegen/python_cst.py new file mode 100644 index 0000000..101be81 --- /dev/null +++ b/simplicio/scratch/codegen/python_cst.py @@ -0,0 +1,149 @@ +"""LibCST helpers shared by deterministic Python scratch executors.""" + +from __future__ import annotations + + +class LibCSTUnavailable(RuntimeError): + """Raised when libcst is not installed in the active environment.""" + + +def _cst(): + try: + import libcst as cst + from libcst.helpers import get_full_name_for_node + except ( + ModuleNotFoundError + ) as exc: # pragma: no cover - exercised by deployments without libcst + raise LibCSTUnavailable("libcst is not installed") from exc + return cst, get_full_name_for_node + + +def format_module(source: str) -> str: + """Round-trip source through LibCST so generated Python stays concrete-syntax valid.""" + cst, _ = _cst() + return cst.parse_module(source or "\n").code + + +def ensure_from_import(source: str, module: str, name: str) -> str: + """Ensure ``from import `` exists, preserving existing import blocks.""" + cst, get_full_name_for_node = _cst() + + class ImportTransformer(cst.CSTTransformer): + changed = False + + def leave_ImportFrom(self, original_node, updated_node): # type: ignore[no-untyped-def] + if original_node.module is None: + return updated_node + if get_full_name_for_node(original_node.module) != module: + return updated_node + names = updated_node.names + if not isinstance(names, tuple): + return updated_node + imported = { + get_full_name_for_node(alias.name) + for alias in names + if get_full_name_for_node(alias.name) + } + if "*" in imported or name in imported: + self.changed = True + return updated_node + if names: + *head, last = names + names = ( + *head, + last.with_changes( + comma=cst.Comma(whitespace_after=cst.SimpleWhitespace(" ")) + ), + cst.ImportAlias(cst.Name(name)), + ) + else: + names = (cst.ImportAlias(cst.Name(name)),) + self.changed = True + return updated_node.with_changes(names=names) + + tree = cst.parse_module(source or "\n") + transformer = ImportTransformer() + updated = tree.visit(transformer) + if transformer.changed: + return updated.code + + import_stmt = cst.parse_statement(f"from {module} import {name}\n") + body = list(updated.body) + insert_at = _import_insert_index(body, cst) + body.insert(insert_at, import_stmt) + return updated.with_changes(body=body).code + + +def append_statement_to_class( + source: str, class_name: str, statement: str +) -> str | None: + """Append a concrete statement to a class body using LibCST.""" + cst, _ = _cst() + + class ClassAppender(cst.CSTTransformer): + changed = False + + def leave_ClassDef(self, original_node, updated_node): # type: ignore[no-untyped-def] + if original_node.name.value != class_name: + return updated_node + if not isinstance(updated_node.body, cst.IndentedBlock): + return updated_node + next_statement = cst.parse_statement(statement) + body = [item for item in updated_node.body.body if not _is_pass(item, cst)] + body.append(next_statement) + self.changed = True + return updated_node.with_changes( + body=updated_node.body.with_changes(body=tuple(body)) + ) + + tree = cst.parse_module(source or "\n") + transformer = ClassAppender() + updated = tree.visit(transformer) + return updated.code if transformer.changed else None + + +def append_module_statements(source: str, statements: str) -> str: + """Append one or more top-level statements to a module using LibCST.""" + cst, _ = _cst() + tree = cst.parse_module(source or "\n") + additions = list(cst.parse_module(statements).body) + body = [*tree.body, *additions] + return tree.with_changes(body=body).code + + +def insert_module_statement_after_imports(source: str, statement: str) -> str: + """Insert a top-level statement immediately after imports/docstring.""" + cst, _ = _cst() + tree = cst.parse_module(source or "\n") + body = list(tree.body) + body.insert(_import_insert_index(body, cst), cst.parse_statement(statement)) + return tree.with_changes(body=body).code + + +def _import_insert_index(body, cst) -> int: # type: ignore[no-untyped-def] + insert_at = 0 + for index, statement in enumerate(body): + if _is_docstring(statement, cst) or _is_import(statement, cst): + insert_at = index + 1 + continue + break + return insert_at + + +def _is_docstring(statement, cst) -> bool: # type: ignore[no-untyped-def] + if not isinstance(statement, cst.SimpleStatementLine) or len(statement.body) != 1: + return False + expr = statement.body[0] + return isinstance(expr, cst.Expr) and isinstance(expr.value, cst.SimpleString) + + +def _is_import(statement, cst) -> bool: # type: ignore[no-untyped-def] + return isinstance(statement, cst.SimpleStatementLine) and any( + isinstance(item, (cst.Import, cst.ImportFrom)) for item in statement.body + ) + + +def _is_pass(statement, cst) -> bool: # type: ignore[no-untyped-def] + return isinstance(statement, cst.SimpleStatementLine) and any( + isinstance(item, cst.Pass) for item in statement.body + ) diff --git a/simplicio/scratch/codegen/python_fastapi.py b/simplicio/scratch/codegen/python_fastapi.py new file mode 100644 index 0000000..afe07d4 --- /dev/null +++ b/simplicio/scratch/codegen/python_fastapi.py @@ -0,0 +1,215 @@ +"""Deterministic FastAPI route edits for scratch tasks.""" + +from __future__ import annotations + +import ast +import re +from dataclasses import dataclass +from pathlib import Path + +from ..plan_schema import Task +from ..stack_registry import Stack +from .python_cst import ( + LibCSTUnavailable, + append_module_statements, + ensure_from_import, + insert_module_statement_after_imports, +) +from .types import CodegenResult, TaskExecutor + + +@dataclass(frozen=True) +class _RouteSpec: + method: str + path: str + function_name: str + parameters: list[str] + + +class PythonAddFastApiRouteExecutor(TaskExecutor): + """Add one small FastAPI route handler to an existing API module.""" + + name = "python-add-fastapi-route" + + def can_handle(self, task: Task, stack: Stack) -> bool: + if not _is_fastapi_stack(stack): + return False + target = task.target.replace("\\", "/").lower() + if not target.endswith(".py") or "/src/api/" not in f"/{target}": + return False + text = _task_text(task).lower() + return "endpoint" in text or "route" in text or "router" in text + + def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult: + spec = _parse_route_spec(task) + if spec is None: + return _fallback("unsupported FastAPI route task shape") + + target = project_dir / task.target + if not target.is_file(): + return _fallback(f"target file not found: {task.target}") + + original = target.read_text(encoding="utf-8") + try: + tree = ast.parse(original or "\n") + except SyntaxError as exc: + return _fallback(f"target is not valid Python: {exc.msg}") + + if _route_exists(tree, spec): + return CodegenResult( + passed=True, + files_modified=[], + log=f"{spec.method.upper()} {spec.path} already exists; no changes needed", + ) + + newline = _detect_newline(original) + try: + updated = ensure_from_import(original, "fastapi", "APIRouter") + if not _has_router_assignment(ast.parse(updated or "\n")): + updated = insert_module_statement_after_imports( + updated, + f"router = APIRouter(){newline}", + ) + updated = append_module_statements(updated, _render_route(spec, newline)) + except LibCSTUnavailable as exc: + return _fallback(str(exc)) + + target.write_text(updated, encoding="utf-8") + return CodegenResult( + passed=True, + files_modified=[target], + log=f"added FastAPI {spec.method.upper()} {spec.path} route with libcst", + ) + + +def _is_fastapi_stack(stack: Stack) -> bool: + text = f"{stack.slug} {stack.language} {stack.framework}".lower() + return "fastapi" in text or stack.slug.startswith("py-") + + +def _task_text(task: Task) -> str: + return "\n".join([task.goal, task.criteria, task.constraints]) + + +def _parse_route_spec(task: Task) -> _RouteSpec | None: + text = _task_text(task) + method = _parse_method(text) + path = _parse_path(text) + if path is None: + return None + return _RouteSpec( + method=method, + path=path, + function_name=_function_name(method, path, task.target), + parameters=_path_parameters(path), + ) + + +def _parse_method(text: str) -> str: + match = re.search(r"\b(GET|POST|PUT|PATCH|DELETE)\b", text, re.IGNORECASE) + return match.group(1).lower() if match else "get" + + +def _parse_path(text: str) -> str | None: + for pattern in [ + r"`(/[^`]+)`", + r"['\"](/[^'\"]+)['\"]", + r"\b(GET|POST|PUT|PATCH|DELETE)\s+(/[^\s`'\"]+)", + ]: + match = re.search(pattern, text, re.IGNORECASE) + if match: + value = match.group(match.lastindex or 1) + if value.startswith("/"): + return value.rstrip(".,") + return None + + +def _path_parameters(path: str) -> list[str]: + return re.findall(r"{([A-Za-z_][A-Za-z0-9_]*)}", path) + + +def _function_name(method: str, path: str, target: str) -> str: + resource = next( + (part for part in path.split("/") if part and not part.startswith("{")), "" + ) + if not resource: + resource = Path(target).stem + name = re.sub(r"[^A-Za-z0-9_]+", "_", resource).strip("_").lower() or "resource" + prefix = { + "get": "get", + "post": "create", + "put": "update", + "patch": "update", + "delete": "delete", + }.get(method, method) + return f"{prefix}_{_singular(name)}" + + +def _singular(name: str) -> str: + return name[:-1] if name.endswith("s") and len(name) > 1 else name + + +def _route_exists(tree: ast.AST, spec: _RouteSpec) -> bool: + for node in ast.walk(tree): + if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + continue + for decorator in node.decorator_list: + if not isinstance(decorator, ast.Call): + continue + if _decorator_method(decorator.func) != spec.method: + continue + if decorator.args and _literal_string(decorator.args[0]) == spec.path: + return True + return False + + +def _decorator_method(node: ast.AST) -> str | None: + if isinstance(node, ast.Attribute) and _name(node.value) == "router": + return node.attr.lower() + return None + + +def _literal_string(node: ast.AST) -> str | None: + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + return None + + +def _name(node: ast.AST) -> str | None: + if isinstance(node, ast.Name): + return node.id + return None + + +def _has_router_assignment(tree: ast.AST) -> bool: + for node in ast.walk(tree): + if isinstance(node, ast.Assign): + if any(_name(target) == "router" for target in node.targets): + return True + if isinstance(node, ast.AnnAssign) and _name(node.target) == "router": + return True + return False + + +def _render_route(spec: _RouteSpec, newline: str) -> str: + params = ", ".join(f"{param}: str" for param in spec.parameters) + body = ( + [f' return {{"{spec.parameters[0]}": {spec.parameters[0]}}}{newline}'] + if spec.parameters + else [f' return {{"status": "ok"}}{newline}'] + ) + return "".join( + [ + f'{newline}@router.{spec.method}("{spec.path}"){newline}', + f"async def {spec.function_name}({params}) -> dict[str, str]:{newline}", + *body, + ] + ) + + +def _detect_newline(text: str) -> str: + return "\r\n" if "\r\n" in text else "\n" + + +def _fallback(log: str) -> CodegenResult: + return CodegenResult(passed=False, log=log, fallback_to_llm=True) diff --git a/simplicio/scratch/codegen/python_orm.py b/simplicio/scratch/codegen/python_orm.py new file mode 100644 index 0000000..7ce7143 --- /dev/null +++ b/simplicio/scratch/codegen/python_orm.py @@ -0,0 +1,207 @@ +"""Deterministic SQLAlchemy ORM model edits for scratch tasks.""" + +from __future__ import annotations + +import ast +import re +from dataclasses import dataclass +from pathlib import Path + +from ..plan_schema import Task +from ..stack_registry import Stack +from .python_cst import LibCSTUnavailable, append_statement_to_class, ensure_from_import +from .types import CodegenResult, TaskExecutor + + +@dataclass(frozen=True) +class _FieldSpec: + model_name: str + field_name: str + mapped_type: str + + +class PythonAddOrmFieldExecutor(TaskExecutor): + """Add one SQLAlchemy 2.0 ``Mapped[...]`` field to an existing model.""" + + name = "python-add-orm-field" + + def can_handle(self, task: Task, stack: Stack) -> bool: + if not _is_python_stack(stack): + return False + target = task.target.replace("\\", "/").lower() + if not target.endswith(".py"): + return False + if "/src/db/" not in f"/{target}" and "model" not in Path(target).stem: + return False + text = _task_text(task).lower() + return ("field" in text or "column" in text) and ( + "model" in text or "orm" in text or "sqlalchemy" in text + ) + + def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult: + spec = _parse_field_spec(task) + if spec is None: + return _fallback("unsupported ORM field task shape") + + target = project_dir / task.target + if not target.is_file(): + return _fallback(f"target file not found: {task.target}") + + original = target.read_text(encoding="utf-8") + try: + tree = ast.parse(original) + except SyntaxError as exc: + return _fallback(f"target is not valid Python: {exc.msg}") + + model = _find_model_class(tree, spec.model_name) + if model is None: + return _fallback(f"could not find SQLAlchemy model class {spec.model_name}") + if not _looks_like_sqlalchemy_model(model): + return _fallback(f"class {spec.model_name} is not a SQLAlchemy model") + if _class_has_field(model, spec.field_name): + return CodegenResult( + passed=True, + files_modified=[], + log=( + f"{spec.model_name}.{spec.field_name} already exists; " + "no changes needed" + ), + ) + + try: + updated = ensure_from_import(original, "sqlalchemy.orm", "Mapped") + updated = append_statement_to_class( + updated, + spec.model_name, + f"{spec.field_name}: Mapped[{spec.mapped_type}]\n", + ) + except LibCSTUnavailable as exc: + return _fallback(str(exc)) + if updated is None: + return _fallback(f"could not update class {spec.model_name} with LibCST") + + target.write_text(updated, encoding="utf-8") + return CodegenResult( + passed=True, + files_modified=[target], + log=( + f"added {spec.model_name}.{spec.field_name}: Mapped[{spec.mapped_type}] with libcst" + ), + ) + + +def _is_python_stack(stack: Stack) -> bool: + text = f"{stack.slug} {stack.language}".lower() + return "python" in text or stack.slug.startswith("py-") + + +def _task_text(task: Task) -> str: + return "\n".join([task.goal, task.criteria, task.constraints]) + + +def _parse_field_spec(task: Task) -> _FieldSpec | None: + text = _task_text(task) + model_name = _parse_model_name(text) + field_name, mapped_type = _parse_field(text) + if not model_name or not field_name or not mapped_type: + return None + return _FieldSpec( + model_name=model_name, + field_name=field_name, + mapped_type=mapped_type, + ) + + +def _parse_model_name(text: str) -> str | None: + patterns = [ + r"\bclass\s+([A-Z][A-Za-z0-9_]*)\b", + r"`([A-Z][A-Za-z0-9_]*)`\s+(?:ORM\s+)?model\b", + r"\b([A-Z][A-Za-z0-9_]*)\s+(?:ORM\s+)?model\b", + r"\b(?:to|on|in)\s+(?:the\s+)?([A-Z][A-Za-z0-9_]*)\b", + ] + for pattern in patterns: + match = re.search(pattern, text) + if match: + return match.group(1) + return None + + +def _parse_field(text: str) -> tuple[str | None, str | None]: + explicit = re.search( + r"\b([a-z_][A-Za-z0-9_]*)\s*:\s*Mapped\[([A-Za-z0-9_., \[\]\"']+)\]", + text, + ) + if explicit: + return explicit.group(1), explicit.group(2).strip() + + name_patterns = [ + r"\badd\s+(?:the\s+)?`?([a-z_][A-Za-z0-9_]*)`?\s+(?:field|column)\b", + r"\b(?:field|column)\s+`?([a-z_][A-Za-z0-9_]*)`?\b", + ] + field_name = None + for pattern in name_patterns: + match = re.search(pattern, text, re.IGNORECASE) + if match: + field_name = match.group(1) + break + + if field_name is None: + return None, None + + lowered = text.lower() + if "mapped[str]" in lowered or " string" in lowered or field_name == "email": + return field_name, "str" + if "mapped[int]" in lowered or " integer" in lowered: + return field_name, "int" + if "mapped[bool]" in lowered or " boolean" in lowered: + return field_name, "bool" + return None, None + + +def _find_model_class(tree: ast.AST, model_name: str) -> ast.ClassDef | None: + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef) and node.name == model_name: + return node + return None + + +def _looks_like_sqlalchemy_model(node: ast.ClassDef) -> bool: + if any(_base_name(base) in {"Base", "DeclarativeBase"} for base in node.bases): + return True + for item in node.body: + if isinstance(item, ast.Assign): + if any(_target_name(target) == "__tablename__" for target in item.targets): + return True + if isinstance(item, ast.AnnAssign): + if _target_name(item.target) == "__tablename__": + return True + return False + + +def _base_name(node: ast.AST) -> str | None: + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + return node.attr + return None + + +def _target_name(node: ast.AST) -> str | None: + if isinstance(node, ast.Name): + return node.id + return None + + +def _class_has_field(node: ast.ClassDef, field_name: str) -> bool: + for item in node.body: + if isinstance(item, ast.AnnAssign): + if _target_name(item.target) == field_name: + return True + if isinstance(item, ast.Assign): + if any(_target_name(target) == field_name for target in item.targets): + return True + return False + + +def _fallback(log: str) -> CodegenResult: + return CodegenResult(passed=False, log=log, fallback_to_llm=True) diff --git a/simplicio/scratch/codegen/python_pydantic.py b/simplicio/scratch/codegen/python_pydantic.py new file mode 100644 index 0000000..2f1ea48 --- /dev/null +++ b/simplicio/scratch/codegen/python_pydantic.py @@ -0,0 +1,524 @@ +"""Deterministic Pydantic schema generation for scratch tasks.""" + +from __future__ import annotations + +import ast +import re +from dataclasses import dataclass +from pathlib import Path + +from ..plan_schema import Task +from ..stack_registry import Stack +from .python_cst import LibCSTUnavailable, format_module +from .types import CodegenResult, TaskExecutor + + +_AUTO_INPUT_FIELDS = {"id", "created_at", "updated_at", "created_on", "updated_on"} +_TYPE_IMPORTS = { + "date": ("datetime", "date"), + "datetime": ("datetime", "datetime"), + "Decimal": ("decimal", "Decimal"), + "UUID": ("uuid", "UUID"), +} + + +@dataclass(frozen=True) +class _SchemaSpec: + model_name: str + model_path: Path + + +@dataclass(frozen=True) +class _ModelField: + name: str + type_text: str + primary_key: bool = False + has_default: bool = False + nullable: bool = False + generated_input: bool = False + + +class PythonAddPydanticSchemaExecutor(TaskExecutor): + """Generate ``XCreate``, ``XUpdate``, and ``XRead`` Pydantic schemas.""" + + name = "python-add-pydantic-schema" + + def can_handle(self, task: Task, stack: Stack) -> bool: + if not _is_fastapi_stack(stack): + return False + target = task.target.replace("\\", "/").lower() + if not target.endswith(".py"): + return False + if ( + "/src/api/schemas/" not in f"/{target}" + and "schema" not in Path(target).stem + ): + return False + text = _task_text(task).lower() + return "pydantic" in text or "schema" in text + + def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult: + spec = _parse_schema_spec(task, project_dir) + if spec is None: + return _fallback("unsupported Pydantic schema task shape") + + target = project_dir / task.target + if target.exists() and not target.is_file(): + return _fallback(f"target is not a file: {task.target}") + + fields = _extract_model_fields(spec.model_path, spec.model_name) + if not fields: + return _fallback(f"could not derive fields from {spec.model_name} model") + + original = target.read_text(encoding="utf-8") if target.exists() else "" + try: + tree = ast.parse(original or "\n") + except SyntaxError as exc: + return _fallback(f"target schema file is not valid Python: {exc.msg}") + + classes = _schema_class_names(spec.model_name) + existing = _module_class_names(tree) + missing = [name for name in classes if name not in existing] + if not missing: + return CodegenResult( + passed=True, + files_modified=[], + log=( + f"{spec.model_name} Pydantic schemas already exist; " + "no changes needed" + ), + ) + + try: + updated = format_module( + _render_updated_schema_module( + original=original, + tree=tree, + model_name=spec.model_name, + fields=fields, + class_names=missing, + ) + ) + except LibCSTUnavailable as exc: + return _fallback(str(exc)) + try: + ast.parse(updated) + except SyntaxError as exc: + return _fallback( + f"generated Pydantic schemas are not valid Python: {exc.msg}" + ) + + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(updated, encoding="utf-8") + return CodegenResult( + passed=True, + files_modified=[target], + log=( + f"generated Pydantic schemas with libcst for {spec.model_name} from " + f"{spec.model_path.relative_to(project_dir)}" + ), + ) + + +def _is_fastapi_stack(stack: Stack) -> bool: + text = f"{stack.slug} {stack.language} {stack.framework}".lower() + return "fastapi" in text or stack.slug.startswith("py-") + + +def _task_text(task: Task) -> str: + return "\n".join([task.goal, task.criteria, task.constraints]) + + +def _parse_schema_spec(task: Task, project_dir: Path) -> _SchemaSpec | None: + model_name = _parse_model_name(_task_text(task), task.target) + if model_name is None: + return None + model_path = _find_model_path(project_dir, model_name, task.target) + if model_path is None: + return None + return _SchemaSpec(model_name=model_name, model_path=model_path) + + +def _parse_model_name(text: str, target: str) -> str | None: + patterns = [ + r"\b([A-Z][A-Za-z0-9_]*)\s*(?:Create|Update|Read)\b", + r"\bschemas?\s+for\s+(?:the\s+)?([A-Z][A-Za-z0-9_]*)\b", + r"\b([A-Z][A-Za-z0-9_]*)\s+schemas?\b", + r"\b(?:for|of)\s+(?:the\s+)?([A-Z][A-Za-z0-9_]*)\s+(?:model|resource)\b", + ] + for pattern in patterns: + match = re.search(pattern, text) + if match: + return match.group(1) + return _pascal_case(Path(target).stem) + + +def _pascal_case(value: str) -> str | None: + cleaned = re.sub(r"[^A-Za-z0-9]+", "_", value).strip("_") + if not cleaned: + return None + if cleaned.endswith("ies") and len(cleaned) > 3: + cleaned = f"{cleaned[:-3]}y" + elif cleaned.endswith("s") and len(cleaned) > 1: + cleaned = cleaned[:-1] + parts = [part for part in cleaned.split("_") if part] + return "".join(part[:1].upper() + part[1:] for part in parts) + + +def _snake_case(value: str) -> str: + value = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", value) + value = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", value) + return value.lower() + + +def _find_model_path(project_dir: Path, model_name: str, target: str) -> Path | None: + stem = _snake_case(model_name) + target_stem = Path(target).stem + candidates = [ + project_dir / "src" / "db" / f"{stem}.py", + project_dir / "src" / "db" / f"{target_stem}.py", + project_dir / "src" / "db" / "models.py", + ] + db_root = project_dir / "src" / "db" + if db_root.is_dir(): + candidates.extend(sorted(db_root.rglob("*.py"))) + + seen: set[Path] = set() + for path in candidates: + if path in seen or not path.is_file(): + continue + seen.add(path) + if _has_sqlalchemy_model(path, model_name): + return path + return None + + +def _has_sqlalchemy_model(path: Path, model_name: str) -> bool: + try: + tree = ast.parse(path.read_text(encoding="utf-8")) + except (OSError, SyntaxError): + return False + model = _find_class(tree, model_name) + return model is not None and _looks_like_sqlalchemy_model(model) + + +def _extract_model_fields(path: Path, model_name: str) -> list[_ModelField]: + try: + tree = ast.parse(path.read_text(encoding="utf-8")) + except (OSError, SyntaxError): + return [] + model = _find_class(tree, model_name) + if model is None or not _looks_like_sqlalchemy_model(model): + return [] + + fields: list[_ModelField] = [] + for item in model.body: + if not isinstance(item, ast.AnnAssign): + continue + field = _field_from_annassign(item) + if field is not None: + fields.append(field) + return fields + + +def _field_from_annassign(node: ast.AnnAssign) -> _ModelField | None: + name = _target_name(node.target) + if name is None or name.startswith("_") or name == "__tablename__": + return None + if _is_relationship_assignment(node.value): + return None + type_text = _mapped_type_text(node.annotation) + if type_text is None or type_text.startswith("ClassVar["): + return None + + primary_key = _call_kw_is_true(node.value, "primary_key") + nullable = _call_kw_is_true(node.value, "nullable") or _is_optional_type(type_text) + if nullable: + type_text = _optionalize(type_text) + return _ModelField( + name=name, + type_text=type_text, + primary_key=primary_key, + has_default=_has_default(node.value), + nullable=nullable, + generated_input=primary_key or name in _AUTO_INPUT_FIELDS, + ) + + +def _mapped_type_text(annotation: ast.expr) -> str | None: + if isinstance(annotation, ast.Subscript) and _qualified_name( + annotation.value + ).endswith("Mapped"): + return _normalize_type(ast.unparse(annotation.slice)) + return None + + +def _normalize_type(type_text: str) -> str: + text = type_text.strip().replace("typing.", "") + optional = re.fullmatch(r"Optional\[(.+)\]", text) + if optional: + return _optionalize(_normalize_type(optional.group(1))) + union = re.fullmatch(r"Union\[(.+),\s*None\]", text) + if union: + return _optionalize(_normalize_type(union.group(1))) + union_none_first = re.fullmatch(r"Union\[None,\s*(.+)\]", text) + if union_none_first: + return _optionalize(_normalize_type(union_none_first.group(1))) + return text + + +def _find_class(tree: ast.AST, class_name: str) -> ast.ClassDef | None: + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef) and node.name == class_name: + return node + return None + + +def _looks_like_sqlalchemy_model(node: ast.ClassDef) -> bool: + if any( + _qualified_name(base).split(".")[-1] in {"Base", "DeclarativeBase"} + for base in node.bases + ): + return True + for item in node.body: + if isinstance(item, ast.Assign): + if any(_target_name(target) == "__tablename__" for target in item.targets): + return True + if ( + isinstance(item, ast.AnnAssign) + and _target_name(item.target) == "__tablename__" + ): + return True + return False + + +def _target_name(node: ast.AST) -> str | None: + if isinstance(node, ast.Name): + return node.id + return None + + +def _qualified_name(node: ast.AST | None) -> str: + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + parent = _qualified_name(node.value) + return f"{parent}.{node.attr}" if parent else node.attr + return "" + + +def _is_relationship_assignment(node: ast.AST | None) -> bool: + if not isinstance(node, ast.Call): + return False + return _qualified_name(node.func).split(".")[-1] == "relationship" + + +def _call_kw_is_true(node: ast.AST | None, name: str) -> bool: + if not isinstance(node, ast.Call): + return False + for keyword in node.keywords: + if keyword.arg == name and isinstance(keyword.value, ast.Constant): + return keyword.value.value is True + return False + + +def _has_default(node: ast.AST | None) -> bool: + if node is None: + return False + if not isinstance(node, ast.Call): + return True + default_keywords = {"default", "default_factory", "server_default"} + return any(keyword.arg in default_keywords for keyword in node.keywords) + + +def _is_optional_type(type_text: str) -> bool: + return ( + type_text.endswith("| None") + or type_text.startswith("None |") + or " | None | " in type_text + ) + + +def _optionalize(type_text: str) -> str: + if _is_optional_type(type_text): + return type_text + return f"{type_text} | None" + + +def _schema_class_names(model_name: str) -> list[str]: + return [f"{model_name}Create", f"{model_name}Update", f"{model_name}Read"] + + +def _module_class_names(tree: ast.AST) -> set[str]: + return { + node.name + for node in getattr(tree, "body", []) + if isinstance(node, ast.ClassDef) + } + + +def _render_updated_schema_module( + *, + original: str, + tree: ast.AST, + model_name: str, + fields: list[_ModelField], + class_names: list[str], +) -> str: + classes = _render_schema_classes(model_name, fields, class_names) + if not original.strip(): + return _render_new_schema_module(fields, classes) + + newline = _detect_newline(original) + lines = original.splitlines(keepends=True) + _ensure_pydantic_import(lines, tree, newline) + tree = ast.parse("".join(lines) or "\n") + _ensure_type_imports(lines, tree, fields, newline) + updated = "".join(lines).rstrip() + return f"{updated}{newline}{newline}{classes}" + + +def _render_new_schema_module(fields: list[_ModelField], classes: str) -> str: + imports = _render_imports(fields) + return f"from __future__ import annotations\n\n{imports}\n\n\n{classes}" + + +def _render_imports(fields: list[_ModelField]) -> str: + imports = [] + type_imports = _needed_type_imports(fields) + for module in sorted(type_imports): + names = ", ".join(sorted(type_imports[module])) + imports.append(f"from {module} import {names}") + imports.append("from pydantic import BaseModel, ConfigDict") + return "\n".join(imports) + + +def _needed_type_imports(fields: list[_ModelField]) -> dict[str, set[str]]: + imports: dict[str, set[str]] = {} + pattern = r"\b[A-Z][A-Za-z0-9_]*\b|\bdate\b|\bdatetime\b" + for field in fields: + names = set(re.findall(pattern, field.type_text)) + for name in names: + item = _TYPE_IMPORTS.get(name) + if item is None: + continue + module, import_name = item + imports.setdefault(module, set()).add(import_name) + return imports + + +def _render_schema_classes( + model_name: str, + fields: list[_ModelField], + class_names: list[str], +) -> str: + blocks = [] + for class_name in class_names: + suffix = class_name.removeprefix(model_name) + if suffix == "Create": + blocks.append( + _render_model_class(class_name, _input_fields(fields), "create") + ) + elif suffix == "Update": + blocks.append( + _render_model_class(class_name, _input_fields(fields), "update") + ) + elif suffix == "Read": + blocks.append(_render_model_class(class_name, fields, "read")) + return "\n\n\n".join(blocks) + "\n" + + +def _input_fields(fields: list[_ModelField]) -> list[_ModelField]: + return [field for field in fields if not field.generated_input] + + +def _render_model_class(class_name: str, fields: list[_ModelField], mode: str) -> str: + lines = [f"class {class_name}(BaseModel):"] + if mode == "read": + lines.append(" model_config = ConfigDict(from_attributes=True)") + if fields: + lines.append("") + if not fields: + lines.append(" pass") + return "\n".join(lines) + for field in fields: + lines.append(_render_field(field, mode)) + return "\n".join(lines) + + +def _render_field(field: _ModelField, mode: str) -> str: + if mode == "update": + return f" {field.name}: {_optionalize(field.type_text)} = None" + if mode == "create" and (field.nullable or field.has_default): + return f" {field.name}: {_optionalize(field.type_text)} = None" + return f" {field.name}: {field.type_text}" + + +def _ensure_pydantic_import(lines: list[str], tree: ast.AST, newline: str) -> None: + _ensure_from_import(lines, tree, "pydantic", ["BaseModel", "ConfigDict"], newline) + + +def _ensure_type_imports( + lines: list[str], tree: ast.AST, fields: list[_ModelField], newline: str +) -> None: + for module, names in _needed_type_imports(fields).items(): + _ensure_from_import(lines, tree, module, sorted(names), newline) + tree = ast.parse("".join(lines) or "\n") + + +def _ensure_from_import( + lines: list[str], + tree: ast.AST, + module: str, + names: list[str], + newline: str, +) -> None: + missing = set(names) + insert_at = 0 + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.Expr) and isinstance(node.value, ast.Constant): + insert_at = max(insert_at, getattr(node, "end_lineno", node.lineno)) + continue + if isinstance(node, ast.ImportFrom) and node.module == "__future__": + insert_at = max(insert_at, getattr(node, "end_lineno", node.lineno)) + continue + if isinstance(node, (ast.Import, ast.ImportFrom)): + insert_at = max(insert_at, getattr(node, "end_lineno", node.lineno)) + if not ( + isinstance(node, ast.ImportFrom) + and node.level == 0 + and node.module == module + ): + continue + if any(alias.name == "*" for alias in node.names): + return + imported = {alias.name for alias in node.names} + missing -= imported + if not missing: + return + if node.lineno == getattr(node, "end_lineno", node.lineno): + _add_names_to_import_line(lines, node.lineno - 1, module, sorted(missing)) + return + lines.insert(insert_at, f"from {module} import {', '.join(names)}{newline}") + + +def _add_names_to_import_line( + lines: list[str], index: int, module: str, names: list[str] +) -> None: + escaped = re.escape(module) + match = re.match( + rf"^(\s*from\s+{escaped}\s+import\s+)(.*?)(\s*(#.*)?\r?\n?)$", + lines[index], + ) + if not match: + return + existing = [part.strip() for part in match.group(2).split(",") if part.strip()] + lines[index] = f"{match.group(1)}{', '.join([*existing, *names])}{match.group(3)}" + + +def _detect_newline(text: str) -> str: + return "\r\n" if "\r\n" in text else "\n" + + +def _fallback(log: str) -> CodegenResult: + return CodegenResult(passed=False, log=log, fallback_to_llm=True) diff --git a/simplicio/scratch/codegen/python_pytest.py b/simplicio/scratch/codegen/python_pytest.py new file mode 100644 index 0000000..0aa69a4 --- /dev/null +++ b/simplicio/scratch/codegen/python_pytest.py @@ -0,0 +1,405 @@ +"""Deterministic pytest test generation for scratch tasks.""" + +from __future__ import annotations + +import ast +import re +from dataclasses import dataclass +from pathlib import Path + +from ..plan_schema import Task +from ..stack_registry import Stack +from .python_cst import LibCSTUnavailable, format_module +from .types import CodegenResult, TaskExecutor + + +@dataclass(frozen=True) +class _FunctionTarget: + name: str + source_path: Path + module: str + node: ast.FunctionDef | ast.AsyncFunctionDef + + +class PythonAddPytestTestExecutor(TaskExecutor): + """Generate one minimal pytest happy-path test for a Python function.""" + + name = "python-add-pytest-test" + + def can_handle(self, task: Task, stack: Stack) -> bool: + if not _is_python_stack(stack): + return False + target = task.target.replace("\\", "/").lower() + if not target.endswith(".py"): + return False + if "tests/" not in f"{target}/" and "/tests/" not in f"/{target}": + return False + text = _task_text(task).lower() + return ( + "pytest" in text or "test" in text or Path(target).name.startswith("test_") + ) + + def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult: + target = project_dir / task.target + if target.exists() and not target.is_file(): + return _fallback(f"target is not a file: {task.target}") + + function_target = _resolve_function_target(task, project_dir) + if function_target is None: + return _fallback("could not resolve a unique Python function to test") + + call_args = _call_args(function_target.node) + if call_args is None: + return _fallback( + f"could not synthesize happy-path arguments for {function_target.name}" + ) + + original = "" + if target.exists(): + original = target.read_text(encoding="utf-8") + try: + tree = ast.parse(original) + except SyntaxError as exc: + return _fallback(f"target test file is not valid Python: {exc.msg}") + test_name = _test_name(function_target.name) + if _module_has_function(tree, test_name): + return CodegenResult( + passed=True, + files_modified=[], + log=f"{test_name} already exists; no changes needed", + ) + + rendered = _render_test(function_target, call_args) + try: + updated = format_module(_append_test(original, rendered)) + except LibCSTUnavailable as exc: + return _fallback(str(exc)) + try: + ast.parse(updated) + except SyntaxError as exc: + return _fallback(f"generated pytest is not valid Python: {exc.msg}") + + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(updated, encoding="utf-8") + return CodegenResult( + passed=True, + files_modified=[target], + log=( + f"generated pytest with libcst {target.relative_to(project_dir)} " + f"for {function_target.module}.{function_target.name}" + ), + ) + + +def _is_python_stack(stack: Stack) -> bool: + text = f"{stack.slug} {stack.language}".lower() + return "python" in text or stack.slug.startswith("py-") + + +def _task_text(task: Task) -> str: + return "\n".join([task.goal, task.criteria, task.constraints]) + + +def _resolve_function_target(task: Task, project_dir: Path) -> _FunctionTarget | None: + text = _task_text(task) + source_paths = _source_paths_from_text(text, project_dir) + function_name = _function_name_from_text(text) + + if function_name is None and len(source_paths) == 1: + functions = _top_level_functions(source_paths[0]) + if len(functions) == 1: + function_name = functions[0].name + + if function_name is None: + return None + + if not source_paths: + source_paths = _find_source_paths_defining(project_dir, function_name) + if len(source_paths) != 1: + return None + + source_path = source_paths[0] + function = _find_function(source_path, function_name) + if function is None: + return None + + module = _module_name(source_path, project_dir) + if module is None: + return None + + return _FunctionTarget( + name=function_name, + source_path=source_path, + module=module, + node=function, + ) + + +def _source_paths_from_text(text: str, project_dir: Path) -> list[Path]: + paths: list[Path] = [] + for match in re.finditer( + r"(?P(?:[A-Za-z0-9_.-]+[\\/])+[A-Za-z0-9_.-]+\.py)", text + ): + raw = match.group("path").strip("`'\"()[],;:") + normalized = raw.replace("\\", "/") + if normalized.startswith("tests/") or "/tests/" in f"/{normalized}": + continue + path = project_dir / normalized + if path.is_file() and path not in paths: + paths.append(path) + return paths + + +def _function_name_from_text(text: str) -> str | None: + patterns = [ + r"\bfunction\s+`?([A-Za-z_][A-Za-z0-9_]*)`?\s*(?:\(\s*\))?", + r"\btarget\s+`?([A-Za-z_][A-Za-z0-9_]*)`?\s*(?:\(\s*\))?", + r"\bfor\s+`?([A-Za-z_][A-Za-z0-9_]*)`?\s*\(", + r"\btest\s+`?([A-Za-z_][A-Za-z0-9_]*)`?\s*(?:\(\s*\))?", + r"`([A-Za-z_][A-Za-z0-9_]*)`\s+function\b", + ] + for pattern in patterns: + match = re.search(pattern, text, re.IGNORECASE) + if match: + return match.group(1) + return None + + +def _find_source_paths_defining(project_dir: Path, function_name: str) -> list[Path]: + roots = [project_dir / "src"] if (project_dir / "src").is_dir() else [project_dir] + paths: list[Path] = [] + for root in roots: + for path in root.rglob("*.py"): + normalized = path.relative_to(project_dir).as_posix() + if normalized.startswith("tests/") or "/tests/" in f"/{normalized}": + continue + if _find_function(path, function_name) is not None: + paths.append(path) + return paths + + +def _top_level_functions(path: Path) -> list[ast.FunctionDef | ast.AsyncFunctionDef]: + try: + tree = ast.parse(path.read_text(encoding="utf-8")) + except (OSError, SyntaxError): + return [] + return [ + node + for node in tree.body + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) + ] + + +def _find_function( + path: Path, function_name: str +) -> ast.FunctionDef | ast.AsyncFunctionDef | None: + for node in _top_level_functions(path): + if node.name == function_name: + return node + return None + + +def _module_name(source_path: Path, project_dir: Path) -> str | None: + roots = _import_roots(project_dir) + resolved_source = source_path.resolve() + for root in sorted(roots, key=lambda item: len(item.parts), reverse=True): + try: + relative = resolved_source.relative_to(root.resolve()) + except ValueError: + continue + module_path = relative.with_suffix("") + parts = list(module_path.parts) + if parts and parts[-1] == "__init__": + parts.pop() + if parts: + return ".".join(parts) + return None + + +def _import_roots(project_dir: Path) -> list[Path]: + roots: list[Path] = [] + for entry in _pytest_pythonpath(project_dir): + path = project_dir / entry + if path.exists() and path not in roots: + roots.append(path) + src = project_dir / "src" + if src.exists() and src not in roots: + roots.append(src) + if project_dir not in roots: + roots.append(project_dir) + return roots + + +def _pytest_pythonpath(project_dir: Path) -> list[str]: + pyproject = project_dir / "pyproject.toml" + if not pyproject.is_file(): + return [] + text = pyproject.read_text(encoding="utf-8") + match = re.search(r"(?m)^\s*pythonpath\s*=\s*(.+)$", text) + if not match: + return [] + raw = match.group(1).strip() + try: + value = ast.literal_eval(raw) + except (SyntaxError, ValueError): + return [] + if isinstance(value, str): + return [value] + if isinstance(value, list): + return [item for item in value if isinstance(item, str)] + return [] + + +def _call_args(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str | None: + args = node.args + rendered: list[str] = [] + positional = list(args.posonlyargs) + list(args.args) + required_positional = len(positional) - len(args.defaults) + for arg in positional[:required_positional]: + literal = _literal_for_arg(arg) + if literal is None: + return None + rendered.append(literal) + + for arg, default in zip(args.kwonlyargs, args.kw_defaults): + if default is not None: + continue + literal = _literal_for_arg(arg) + if literal is None: + return None + rendered.append(f"{arg.arg}={literal}") + + return ", ".join(rendered) + + +def _literal_for_arg(arg: ast.arg) -> str | None: + annotation = _annotation_text(arg.annotation) + lowered = annotation.lower().replace("typing.", "") if annotation else "" + name = arg.arg.lower() + + if "str" in lowered: + return '"sample"' + if "int" in lowered: + return "1" + if "float" in lowered: + return "1.0" + if "bool" in lowered: + return "True" + if ( + "list" in lowered + or lowered.startswith("sequence") + or lowered.startswith("iterable") + ): + return "[]" + if "dict" in lowered or lowered.startswith("mapping"): + return "{}" + if "set" in lowered: + return "set()" + if "tuple" in lowered: + return "()" + + if any( + token in name for token in ("text", "name", "email", "slug", "title", "query") + ): + return '"sample"' + if any( + token in name + for token in ("count", "index", "number", "size", "limit", "offset") + ): + return "1" + if name.endswith("_id") or name == "id": + return "1" + if any(token in name for token in ("items", "rows", "values")): + return "[]" + if any(token in name for token in ("data", "payload", "record")): + return "{}" + if name.startswith("is_") or name in {"enabled", "active"}: + return "True" + + return None + + +def _annotation_text(annotation: ast.expr | None) -> str: + if annotation is None: + return "" + try: + return ast.unparse(annotation) + except AttributeError: + if isinstance(annotation, ast.Name): + return annotation.id + return "" + + +def _render_test(function_target: _FunctionTarget, call_args: str) -> str: + test_name = _test_name(function_target.name) + call = f"{function_target.name}({call_args})" + imports = [] + if isinstance(function_target.node, ast.AsyncFunctionDef): + imports.append("import asyncio") + call = f"asyncio.run({call})" + imports.append(f"from {function_target.module} import {function_target.name}") + + return "\n".join( + [ + *imports, + "", + "", + f"def {test_name}() -> None:", + f" result = {call}", + f" {_assertion(function_target.node)}", + "", + ] + ) + + +def _test_name(function_name: str) -> str: + return f"test_{function_name}_happy_path" + + +def _assertion(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str: + annotation = _annotation_text(node.returns).lower().replace("typing.", "") + if annotation in {"none", "no return"}: + return "assert result is None" + if "bool" in annotation: + return "assert isinstance(result, bool)" + if "str" in annotation: + return "assert isinstance(result, str)" + if "int" in annotation: + return "assert isinstance(result, int)" + if "float" in annotation: + return "assert isinstance(result, float)" + if "list" in annotation: + return "assert isinstance(result, list)" + if "dict" in annotation: + return "assert isinstance(result, dict)" + if not _has_value_return(node): + return "assert result is None" + return "assert result is not None" + + +def _has_value_return(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool: + for child in ast.walk(node): + if isinstance(child, ast.Return) and child.value is not None: + if isinstance(child.value, ast.Constant) and child.value.value is None: + continue + return True + return False + + +def _append_test(original: str, rendered: str) -> str: + if not original.strip(): + return rendered + separator = "\n\n" if original.endswith("\n") else "\n\n\n" + return f"{original}{separator}{rendered}" + + +def _module_has_function(tree: ast.AST, function_name: str) -> bool: + return any( + isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) + and node.name == function_name + for node in getattr(tree, "body", []) + ) + + +def _fallback(log: str) -> CodegenResult: + return CodegenResult(passed=False, log=log, fallback_to_llm=True) diff --git a/simplicio/scratch/codegen/registry.py b/simplicio/scratch/codegen/registry.py index a6e5296..803d194 100644 --- a/simplicio/scratch/codegen/registry.py +++ b/simplicio/scratch/codegen/registry.py @@ -7,9 +7,20 @@ from ..plan_schema import Task from ..stack_registry import Stack +from .python_fastapi import PythonAddFastApiRouteExecutor +from .python_orm import PythonAddOrmFieldExecutor +from .python_pydantic import PythonAddPydanticSchemaExecutor +from .python_pytest import PythonAddPytestTestExecutor +from .typescript_next_route import TypeScriptAddNextRouteExecutor from .types import CodegenResult, TaskExecutor -_DEFAULT_EXECUTORS: list[TaskExecutor] = [] +_DEFAULT_EXECUTORS: list[TaskExecutor] = [ + PythonAddOrmFieldExecutor(), + PythonAddPydanticSchemaExecutor(), + PythonAddFastApiRouteExecutor(), + PythonAddPytestTestExecutor(), + TypeScriptAddNextRouteExecutor(), +] def registered_executors() -> list[TaskExecutor]: @@ -28,5 +39,7 @@ def try_execute( ) -> CodegenResult | None: for executor in executors if executors is not None else _DEFAULT_EXECUTORS: if executor.can_handle(task, stack): - return executor.execute(task, project_dir, stack) + result = executor.execute(task, project_dir, stack) + result.executor_name = executor.name + return result return None diff --git a/simplicio/scratch/codegen/types.py b/simplicio/scratch/codegen/types.py index ae6003b..ccd9007 100644 --- a/simplicio/scratch/codegen/types.py +++ b/simplicio/scratch/codegen/types.py @@ -16,6 +16,7 @@ class CodegenResult: files_modified: list[Path] = field(default_factory=list) log: str = "" fallback_to_llm: bool = False + executor_name: str | None = None class TaskExecutor(ABC): diff --git a/simplicio/scratch/codegen/typescript_next_route.py b/simplicio/scratch/codegen/typescript_next_route.py new file mode 100644 index 0000000..8191017 --- /dev/null +++ b/simplicio/scratch/codegen/typescript_next_route.py @@ -0,0 +1,334 @@ +"""Deterministic Next.js route handler generation for scratch tasks.""" + +from __future__ import annotations + +import json +import os +import re +import shutil +import subprocess +import tempfile +from dataclasses import dataclass +from pathlib import Path + +from ..plan_schema import Task +from ..stack_registry import Stack +from .types import CodegenResult, TaskExecutor + + +_SUPPORTED_METHODS = ("GET", "POST", "PUT", "PATCH", "DELETE") + + +@dataclass(frozen=True) +class _NextRouteSpec: + resource: str + variable_name: str + methods: tuple[str, ...] + + +class TypeScriptAddNextRouteExecutor(TaskExecutor): + """Create small JSON route handlers for Next.js app-router API routes.""" + + name = "typescript-add-next-route" + + def can_handle(self, task: Task, stack: Stack) -> bool: + if not _is_next_stack(stack): + return False + if _route_parts(task.target) is None: + return False + text = _task_text(task).lower() + return any( + token in text + for token in ("api", "crud", "endpoint", "json", "route", "handler") + ) + + def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult: + spec = _parse_route_spec(task) + if spec is None: + return _fallback("unsupported Next.js route task shape") + + target = project_dir / task.target + if target.exists() and not target.is_file(): + return _fallback(f"target is not a file: {task.target}") + + original = target.read_text(encoding="utf-8") if target.exists() else "" + missing = [ + method + for method in spec.methods + if not _has_exported_method(original, method) + ] + if target.exists() and not missing: + return CodegenResult( + passed=True, + files_modified=[], + log=f"{task.target} already has {', '.join(spec.methods)} handlers", + ) + + target.parent.mkdir(parents=True, exist_ok=True) + methods = list(missing or list(spec.methods)) + ok, log = _write_with_ts_morph(project_dir, target, spec, methods) + if not ok: + return _fallback(log) + return CodegenResult( + passed=True, + files_modified=[target], + log=( + "generated Next.js route handlers with ts-morph " + f"{', '.join(missing or list(spec.methods))} for {spec.resource}" + ), + ) + + +def _is_next_stack(stack: Stack) -> bool: + text = f"{stack.slug} {stack.language} {stack.framework}".lower() + return "next" in text or stack.slug == "ts-nextjs" + + +def _task_text(task: Task) -> str: + return "\n".join([task.goal, task.criteria, task.constraints]) + + +def _parse_route_spec(task: Task) -> _NextRouteSpec | None: + parts = _route_parts(task.target) + if parts is None: + return None + + resource = _resource_from_parts(parts) + if not resource: + return None + + methods = _parse_methods(_task_text(task)) + return _NextRouteSpec( + resource=resource, + variable_name=_identifier(resource), + methods=methods, + ) + + +def _route_parts(target: str) -> list[str] | None: + normalized = target.replace("\\", "/") + parts = [part for part in normalized.split("/") if part] + if len(parts) < 5: + return None + if parts[:3] != ["src", "app", "api"] or parts[-1] != "route.ts": + return None + return parts[3:-1] + + +def _resource_from_parts(parts: list[str]) -> str: + for part in reversed(parts): + if part.startswith("[") and part.endswith("]"): + continue + cleaned = re.sub(r"[^A-Za-z0-9_-]+", "", part) + if cleaned: + return cleaned + return "" + + +def _parse_methods(text: str) -> tuple[str, ...]: + upper = text.upper() + methods = [ + method for method in _SUPPORTED_METHODS if re.search(rf"\b{method}\b", upper) + ] + if "CRUD" in upper: + for method in ("GET", "POST"): + if method not in methods: + methods.append(method) + if not methods: + methods = ["GET", "POST"] + return tuple(methods) + + +def _identifier(value: str) -> str: + identifier = re.sub(r"[^A-Za-z0-9_]+", "_", value).strip("_") + if not identifier: + return "items" + if identifier[0].isdigit(): + return f"items_{identifier}" + return identifier + + +def _has_exported_method(text: str, method: str) -> bool: + return ( + re.search( + rf"\bexport\s+(?:async\s+)?function\s+{re.escape(method)}\b", + text, + ) + is not None + ) + + +def _write_with_ts_morph( + project_dir: Path, + target: Path, + spec: _NextRouteSpec, + methods: list[str], +) -> tuple[bool, str]: + payload = { + "routePath": str(target), + "methods": methods, + "resource": spec.resource, + "variableName": spec.variable_name, + } + with tempfile.NamedTemporaryFile( + "w", encoding="utf-8", suffix=".cjs", delete=False + ) as handle: + handle.write(_TS_MORPH_SCRIPT) + script = Path(handle.name) + node = shutil.which("node") or shutil.which("node.exe") + if node is None: + try: + script.unlink() + except OSError: + pass + return False, "ts-morph execution failed: node was not found" + ok, env_or_log = _ts_morph_env(project_dir) + if not ok: + try: + script.unlink() + except OSError: + pass + return False, env_or_log + try: + proc = subprocess.run( + [ + node, + str(script), + json.dumps(payload), + ], + cwd=project_dir, + capture_output=True, + text=True, + env=env_or_log, + timeout=120, + ) + except (FileNotFoundError, subprocess.TimeoutExpired) as exc: + return False, f"ts-morph execution failed: {exc}" + finally: + try: + script.unlink() + except OSError: + pass + if proc.returncode != 0: + log = ((proc.stdout or "") + (proc.stderr or "")).strip() + return False, f"ts-morph execution failed: {log[-1000:]}" + return True, (proc.stdout or "").strip() + + +def _ts_morph_env(project_dir: Path) -> tuple[bool, dict[str, str] | str]: + env = os.environ.copy() + node_modules = _find_node_modules_with_ts_morph(project_dir) + if node_modules is None: + ok, node_modules_or_log = _ensure_ts_morph_cache() + if not ok: + return False, node_modules_or_log + node_modules = node_modules_or_log + existing = env.get("NODE_PATH") + env["NODE_PATH"] = ( + str(node_modules) + if not existing + else os.pathsep.join([str(node_modules), existing]) + ) + return True, env + + +def _find_node_modules_with_ts_morph(project_dir: Path) -> Path | None: + candidates = [ + project_dir / "node_modules", + Path.cwd() / "node_modules", + ] + for node_modules in candidates: + if (node_modules / "ts-morph" / "package.json").is_file(): + return node_modules + return None + + +def _ensure_ts_morph_cache() -> tuple[bool, Path | str]: + cache = Path( + os.environ.get( + "SIMPLICIO_TS_MORPH_CACHE", + str(Path(tempfile.gettempdir()) / "simplicio-ts-morph-node"), + ) + ) + node_modules = cache / "node_modules" + if (node_modules / "ts-morph" / "package.json").is_file(): + return True, node_modules + npm = shutil.which("npm") or shutil.which("npm.cmd") + if npm is None: + return False, "ts-morph execution failed: npm was not found" + cache.mkdir(parents=True, exist_ok=True) + proc = subprocess.run( + [ + npm, + "install", + "--prefix", + str(cache), + "--no-save", + "--silent", + "ts-morph@^28.0.0", + "typescript@^5", + ], + capture_output=True, + text=True, + timeout=120, + ) + if proc.returncode != 0: + log = ((proc.stdout or "") + (proc.stderr or "")).strip() + return False, f"ts-morph dependency install failed: {log[-1000:]}" + return True, node_modules + + +_TS_MORPH_SCRIPT = r""" +const { Project, QuoteKind } = require("ts-morph"); + +const input = JSON.parse(process.argv[2]); +const project = new Project({ + manipulationSettings: { + quoteKind: QuoteKind.Double, + indentationText: " ", + }, +}); +const sourceFile = project.addSourceFileAtPathIfExists(input.routePath) + ?? project.createSourceFile(input.routePath, "", { overwrite: true }); + +function successStatus(method) { + return method === "POST" ? 201 : 200; +} + +function statementsFor(method) { + if (method === "GET") { + return [ + `const ${input.variableName}: Array> = [];`, + `return Response.json(${input.variableName});`, + ]; + } + if (method === "DELETE") { + return ["return Response.json({ deleted: true });"]; + } + return [ + "const body = (await request.json()) as Record;", + `return Response.json(body, { status: ${successStatus(method)} });`, + ]; +} + +for (const method of input.methods) { + if (sourceFile.getFunction(method)) continue; + sourceFile.addFunction({ + isExported: true, + isAsync: true, + name: method, + returnType: "Promise", + parameters: method === "GET" || method === "DELETE" + ? [] + : [{ name: "request", type: "Request" }], + statements: statementsFor(method), + }); +} + +sourceFile.formatText(); +sourceFile.saveSync(); +""" + + +def _fallback(log: str) -> CodegenResult: + return CodegenResult(passed=False, log=log, fallback_to_llm=True) diff --git a/simplicio/scratch/executor.py b/simplicio/scratch/executor.py index 0dbf764..51e6e1c 100644 --- a/simplicio/scratch/executor.py +++ b/simplicio/scratch/executor.py @@ -12,6 +12,7 @@ the pipeline today expects a single goal/target/contract and we need a small adapter shim. Adapter lives in simplicio/scratch/_pipeline_adapter.py. """ + from __future__ import annotations import json @@ -32,6 +33,9 @@ class TaskResult: id: str target: str passed: bool + execution_mode: str = "unknown" + codegen_executor: Optional[str] = None + files_modified: list[str] = field(default_factory=list) skipped_reason: Optional[str] = None duration_ms: int = 0 log_tail: str = "" @@ -55,6 +59,26 @@ def tasks_passed(self) -> int: def tasks_total(self) -> int: return len(self.task_results) + @property + def metrics(self) -> dict: + by_mode = { + mode: [task for task in self.task_results if task.execution_mode == mode] + for mode in ("codegen", "llm", "skipped", "failed") + } + total = self.tasks_total + codegen = len(by_mode["codegen"]) + return { + "tasks_total": total, + "tasks_codegen": codegen, + "tasks_llm": len(by_mode["llm"]), + "tasks_skipped": len(by_mode["skipped"]), + "tasks_failed": len(by_mode["failed"]), + "codegen_share": round(codegen / total, 4) if total else 0.0, + "avg_codegen_ms": _avg_ms(by_mode["codegen"]), + "avg_llm_ms": _avg_ms(by_mode["llm"]), + "avg_task_ms": _avg_ms(self.task_results), + } + def to_dict(self) -> dict: return { "project_dir": str(self.project_dir), @@ -63,13 +87,22 @@ def to_dict(self) -> dict: "install_ok": self.install_ok, "install_log_tail": self.install_log[-1500:], "tasks": [ - {"id": t.id, "target": t.target, "passed": t.passed, - "skipped": t.skipped_reason, "duration_ms": t.duration_ms, - "log_tail": t.log_tail[-400:]} + { + "id": t.id, + "target": t.target, + "passed": t.passed, + "execution_mode": t.execution_mode, + "codegen_executor": t.codegen_executor, + "files_modified": t.files_modified, + "skipped": t.skipped_reason, + "duration_ms": t.duration_ms, + "log_tail": t.log_tail[-400:], + } for t in self.task_results ], "tasks_passed": self.tasks_passed, "tasks_total": self.tasks_total, + "metrics": self.metrics, "elapsed_s": round(self.elapsed_s, 2), } @@ -102,13 +135,16 @@ def visit(t: Task) -> None: return ordered -def _safe_run(cmd: list[str] | str, cwd: Path, - timeout: int = 300) -> tuple[bool, str]: +def _safe_run(cmd: list[str] | str, cwd: Path, timeout: int = 300) -> tuple[bool, str]: """Run a shell command, never raising. Returns (ok, log_tail).""" try: p = subprocess.run( - cmd, cwd=cwd, capture_output=True, text=True, - timeout=timeout, shell=isinstance(cmd, str), + cmd, + cwd=cwd, + capture_output=True, + text=True, + timeout=timeout, + shell=isinstance(cmd, str), ) except subprocess.TimeoutExpired: return False, f"TIMEOUT after {timeout}s" @@ -118,8 +154,7 @@ def _safe_run(cmd: list[str] | str, cwd: Path, return p.returncode == 0, log -def _execute_one_task(task: Task, project_dir: Path, - stack: Stack) -> TaskResult: +def _execute_one_task(task: Task, project_dir: Path, stack: Stack) -> TaskResult: """Execute a single task. For Phase 0/1, this stubs out the actual code generation if no SIMPLICIO_MODEL is set so the scaffold + verify pipeline can still be smoke-tested. When SIMPLICIO_MODEL IS set, defers to @@ -137,11 +172,14 @@ def _execute_one_task(task: Task, project_dir: Path, # smoke-test mode: log the task but mark as skipped (no LLM call made) ms = int((time.perf_counter() - t0) * 1000) fallback_note = ( - f"codegen fallback: {codegen_log[:200]}\n" - if codegen_log else "" + f"codegen fallback: {codegen_log[:200]}\n" if codegen_log else "" ) return TaskResult( - id=task.id, target=task.target, passed=False, duration_ms=ms, + id=task.id, + target=task.target, + passed=False, + duration_ms=ms, + execution_mode="skipped", skipped_reason="no SIMPLICIO_MODEL set; task generation skipped", log_tail=f"{fallback_note}goal={task.goal[:200]}", ) @@ -151,7 +189,11 @@ def _execute_one_task(task: Task, project_dir: Path, except ImportError as e: ms = int((time.perf_counter() - t0) * 1000) return TaskResult( - id=task.id, target=task.target, passed=False, duration_ms=ms, + id=task.id, + target=task.target, + passed=False, + duration_ms=ms, + execution_mode="failed", skipped_reason=f"adapter import failed: {e}", ) @@ -159,8 +201,14 @@ def _execute_one_task(task: Task, project_dir: Path, if codegen_log: log = f"codegen fallback: {codegen_log}\n\n{log}" ms = int((time.perf_counter() - t0) * 1000) - return TaskResult(id=task.id, target=task.target, passed=passed, - duration_ms=ms, log_tail=log) + return TaskResult( + id=task.id, + target=task.target, + passed=passed, + execution_mode="llm" if passed else "failed", + duration_ms=ms, + log_tail=log, + ) def _task_result_from_codegen( @@ -173,13 +221,23 @@ def _task_result_from_codegen( id=task.id, target=task.target, passed=result.passed, + execution_mode="codegen" if result.passed else "failed", + codegen_executor=result.executor_name, + files_modified=[str(path) for path in result.files_modified], duration_ms=ms, log_tail=f"{result.log}{suffix}".strip(), ) -def execute_plan(plan: Plan, stack: Stack, parent_dir: Path, - skip_install: bool = False) -> ExecutorReport: +def _avg_ms(tasks: list[TaskResult]) -> int: + if not tasks: + return 0 + return round(sum(task.duration_ms for task in tasks) / len(tasks)) + + +def execute_plan( + plan: Plan, stack: Stack, parent_dir: Path, skip_install: bool = False +) -> ExecutorReport: """Materialize the plan into parent_dir//.""" t_start = time.perf_counter() @@ -187,7 +245,8 @@ def execute_plan(plan: Plan, stack: Stack, parent_dir: Path, if project_dir.exists(): raise FileExistsError( f"project directory already exists: {project_dir}. " - "Choose a different project_name or remove the existing dir.") + "Choose a different project_name or remove the existing dir." + ) project_dir.mkdir(parents=True) report = ExecutorReport(project_dir=project_dir, stack_slug=stack.slug) @@ -204,28 +263,44 @@ def execute_plan(plan: Plan, stack: Stack, parent_dir: Path, sim_dir = project_dir / ".simplicio" sim_dir.mkdir(exist_ok=True) plan_path = sim_dir / "plan.json" - plan_path.write_text(json.dumps({ - "version": plan.version, - "stack": plan.stack, - "project_name": plan.project_name, - "rationale": plan.rationale, - "files_to_create": [{"path": f.path, "purpose": f.purpose} - for f in plan.files_to_create], - "tasks": [{"id": t.id, "goal": t.goal, "target": t.target, - "criteria": t.criteria, "constraints": t.constraints, - "verify": t.verify, "depends_on": t.depends_on} - for t in plan.tasks], - "deps_to_install": plan.deps_to_install, - "deps_dev": plan.deps_dev, - "test_command": plan.test_command, - "lint_command": plan.lint_command, - "estimated_total_tasks": plan.estimated_total_tasks, - }, indent=2), encoding="utf-8") + plan_path.write_text( + json.dumps( + { + "version": plan.version, + "stack": plan.stack, + "project_name": plan.project_name, + "rationale": plan.rationale, + "files_to_create": [ + {"path": f.path, "purpose": f.purpose} for f in plan.files_to_create + ], + "tasks": [ + { + "id": t.id, + "goal": t.goal, + "target": t.target, + "criteria": t.criteria, + "constraints": t.constraints, + "verify": t.verify, + "depends_on": t.depends_on, + } + for t in plan.tasks + ], + "deps_to_install": plan.deps_to_install, + "deps_dev": plan.deps_dev, + "test_command": plan.test_command, + "lint_command": plan.lint_command, + "estimated_total_tasks": plan.estimated_total_tasks, + }, + indent=2, + ), + encoding="utf-8", + ) # 3. Run install (best-effort) if not skip_install and stack.install_command: report.install_ok, report.install_log = _safe_run( - stack.install_command, project_dir, timeout=600) + stack.install_command, project_dir, timeout=600 + ) # 4. Execute tasks in dependency order for task in _topo_sort(plan.tasks): @@ -235,6 +310,7 @@ def execute_plan(plan: Plan, stack: Stack, parent_dir: Path, # 5. Write final report next to the plan (sim_dir / "scratch_report.json").write_text( - json.dumps(report.to_dict(), indent=2), encoding="utf-8") + json.dumps(report.to_dict(), indent=2), encoding="utf-8" + ) return report diff --git a/simplicio/scratch/plan_schema.py b/simplicio/scratch/plan_schema.py index c742e5c..5a0cb9e 100644 --- a/simplicio/scratch/plan_schema.py +++ b/simplicio/scratch/plan_schema.py @@ -9,11 +9,12 @@ The structure is simple enough that a manual validator gives clearer errors than jsonschema-default output. """ + from __future__ import annotations import re from dataclasses import dataclass, field -from typing import Any, Optional +from typing import Any SCHEMA_VERSION = "1.0" @@ -54,6 +55,7 @@ class Plan: class PlanValidationError(ValueError): """Raised when the planner output is off-schema. Carries the human-readable diff so the planner can be re-prompted with the exact violation list.""" + def __init__(self, errors: list[str]) -> None: self.errors = errors super().__init__("plan validation failed:\n - " + "\n - ".join(errors)) @@ -103,7 +105,8 @@ def validate_plan(raw: dict) -> Plan: project_name = _need(raw, "project_name", str, errors, "") or "" if project_name and not _PROJECT_NAME_RE.match(project_name): errors.append( - f"project_name '{project_name}' must match {_PROJECT_NAME_RE.pattern}") + f"project_name '{project_name}' must match {_PROJECT_NAME_RE.pattern}" + ) rationale = _need(raw, "rationale", str, errors, "") or "" @@ -129,8 +132,7 @@ def validate_plan(raw: dict) -> Plan: tid = _need(tr, "id", str, errors, path) or "" if tid: if not _TASK_ID_RE.match(tid): - errors.append( - f"{path}.id '{tid}' must match {_TASK_ID_RE.pattern}") + errors.append(f"{path}.id '{tid}' must match {_TASK_ID_RE.pattern}") elif tid in seen_ids: errors.append(f"{path}.id '{tid}' is duplicated") else: @@ -143,20 +145,28 @@ def validate_plan(raw: dict) -> Plan: d = tr.get("depends_on", []) deps = _list_of_str(d, errors, f"{path}.depends_on") if g and t and c and co and v: - tasks.append(Task(id=tid, goal=g, target=t, criteria=c, - constraints=co, verify=v, depends_on=deps)) + tasks.append( + Task( + id=tid, + goal=g, + target=t, + criteria=c, + constraints=co, + verify=v, + depends_on=deps, + ) + ) # Cross-task validation: depends_on must reference existing IDs for t in tasks: for dep in t.depends_on: if dep not in seen_ids: - errors.append( - f"tasks[{t.id}].depends_on references unknown id '{dep}'") + errors.append(f"tasks[{t.id}].depends_on references unknown id '{dep}'") - deps_to_install = _list_of_str(raw.get("deps_to_install", []), - errors, "deps_to_install") - deps_dev = _list_of_str(raw.get("deps_dev", []), - errors, "deps_dev") + deps_to_install = _list_of_str( + raw.get("deps_to_install", []), errors, "deps_to_install" + ) + deps_dev = _list_of_str(raw.get("deps_dev", []), errors, "deps_dev") test_command = _need(raw, "test_command", str, errors, "") or "" lint_command = _need(raw, "lint_command", str, errors, "") or "" estimated = _need(raw, "estimated_total_tasks", int, errors, "") @@ -164,7 +174,8 @@ def validate_plan(raw: dict) -> Plan: estimated = 0 elif estimated != len(tasks): errors.append( - f"estimated_total_tasks={estimated} but tasks has {len(tasks)} entries") + f"estimated_total_tasks={estimated} but tasks has {len(tasks)} entries" + ) if errors: raise PlanValidationError(errors) @@ -190,12 +201,10 @@ def validate_plan(raw: dict) -> Plan: "stack": "py-fastapi", "project_name": "condo-mgmt", "rationale": "FastAPI is the lightest Python web stack with type hints; " - "fits a CRUD admin app with low operational overhead.", + "fits a CRUD admin app with low operational overhead.", "files_to_create": [ - {"path": "src/api/units.py", - "purpose": "REST endpoints for the Unit entity"}, - {"path": "src/db/models.py", - "purpose": "SQLAlchemy ORM model for Unit"}, + {"path": "src/api/units.py", "purpose": "REST endpoints for the Unit entity"}, + {"path": "src/db/models.py", "purpose": "SQLAlchemy ORM model for Unit"}, ], "tasks": [ { diff --git a/simplicio/scratch/skill_opt.py b/simplicio/scratch/skill_opt.py index adc6c9b..d183de6 100644 --- a/simplicio/scratch/skill_opt.py +++ b/simplicio/scratch/skill_opt.py @@ -5,6 +5,7 @@ `.skills/`. Always writes the generated skill with `review_required: true` in the frontmatter, so a human gate-keeps before it becomes a default. """ + from __future__ import annotations import argparse @@ -116,13 +117,17 @@ def _has_review_gate(text: str) -> bool: return bool(re.search(r"review_required:\s*true", text)) -def generate_skill_doc(description: str, skills_root: Optional[Path] = None, - planner_model: Optional[str] = None) -> tuple[str, str]: +def generate_skill_doc( + description: str, + skills_root: Optional[Path] = None, + planner_model: Optional[str] = None, +) -> tuple[str, str]: """Generate the SKILL.md content. Returns (slug, full markdown).""" root = skills_root or _skills_root() existing = _list_existing_skills(root) - pm = planner_model or os.environ.get("SIMPLICIO_PLANNER", - "deepseek/deepseek-v4-pro") + pm = planner_model or os.environ.get( + "SIMPLICIO_PLANNER", "deepseek/deepseek-v4-pro" + ) prompt = SKILL_GEN_TEMPLATE.format( system=SKILL_GEN_SYSTEM, @@ -139,21 +144,23 @@ def generate_skill_doc(description: str, skills_root: Optional[Path] = None, slug = _extract_slug(text) if not slug: raise SkillOptError( - "generated SKILL.md is missing a valid `name:` frontmatter field") + "generated SKILL.md is missing a valid `name:` frontmatter field" + ) if not _has_review_gate(text): raise SkillOptError( "generated SKILL.md is missing `review_required: true` gate — " - "rejected to protect .skills/ from un-reviewed defaults") + "rejected to protect .skills/ from un-reviewed defaults" + ) if slug in existing: raise SkillOptError( f"skill '{slug}' already exists; pick a different angle or " - f"reference the existing one") + f"reference the existing one" + ) return slug, text -def install_skill(slug: str, markdown: str, - skills_root: Optional[Path] = None) -> Path: +def install_skill(slug: str, markdown: str, skills_root: Optional[Path] = None) -> Path: """Write the generated SKILL.md to disk and return its path.""" root = skills_root or _skills_root() root.mkdir(parents=True, exist_ok=True) @@ -168,12 +175,17 @@ def install_skill(slug: str, markdown: str, def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(prog="simplicio skill new") - parser.add_argument("description", - help="what the skill should do (one or two sentences)") - parser.add_argument("--planner", default=None, - help="override SIMPLICIO_PLANNER for this run") - parser.add_argument("--dry-run", action="store_true", - help="print the generated SKILL.md but do not write it") + parser.add_argument( + "description", help="what the skill should do (one or two sentences)" + ) + parser.add_argument( + "--planner", default=None, help="override SIMPLICIO_PLANNER for this run" + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="print the generated SKILL.md but do not write it", + ) args = parser.parse_args(argv) if args.planner: @@ -195,6 +207,9 @@ def main(argv: list[str] | None = None) -> int: print(f"[skill-opt] {e}", file=sys.stderr) return 3 print(f"[skill-opt] installed at {path}", file=sys.stderr) - print(f"[skill-opt] frontmatter has review_required: true — review it " - f"before relying on it.", file=sys.stderr) + print( + "[skill-opt] frontmatter has review_required: true — review it " + "before relying on it.", + file=sys.stderr, + ) return 0 diff --git a/simplicio/scratch/stack_registry.py b/simplicio/scratch/stack_registry.py index 115e6a2..3805bf3 100644 --- a/simplicio/scratch/stack_registry.py +++ b/simplicio/scratch/stack_registry.py @@ -8,6 +8,7 @@ tree/ — files literally copied at scaffold time (placeholders {project_name} and {goal} are rendered by executor) """ + from __future__ import annotations import json @@ -17,6 +18,8 @@ from pathlib import Path from typing import Iterator, Optional +_TREE_CACHE_DIRS = {"__pycache__", ".mypy_cache", ".pytest_cache", ".ruff_cache"} + def _stacks_root() -> Path: """Resolve the stacks directory. Honors SIMPLICIO_STACKS_DIR for testing.""" @@ -30,6 +33,7 @@ def _stacks_root() -> Path: @dataclass class Stack: """A single stack template loaded from disk.""" + slug: str path: Path meta: dict = field(default_factory=dict) @@ -38,22 +42,28 @@ class Stack: verify: dict = field(default_factory=dict) @property - def language(self) -> str: return self.meta.get("language", "?") + def language(self) -> str: + return self.meta.get("language", "?") @property - def framework(self) -> str: return self.meta.get("framework", "?") + def framework(self) -> str: + return self.meta.get("framework", "?") @property - def version(self) -> str: return self.meta.get("template_version", "0.0.0") + def version(self) -> str: + return self.meta.get("template_version", "0.0.0") @property - def test_command(self) -> str: return self.verify.get("test", "") + def test_command(self) -> str: + return self.verify.get("test", "") @property - def lint_command(self) -> str: return self.verify.get("lint", "") + def lint_command(self) -> str: + return self.verify.get("lint", "") @property - def install_command(self) -> str: return self.verify.get("install", "") + def install_command(self) -> str: + return self.verify.get("install", "") def tree_files(self) -> Iterator[Path]: """Yield every file under tree/ recursively.""" @@ -61,6 +71,8 @@ def tree_files(self) -> Iterator[Path]: if not tree.is_dir(): return for p in tree.rglob("*"): + if _is_ignored_tree_cache(p, tree): + continue if p.is_file(): yield p @@ -72,6 +84,8 @@ def render_tree(self, dest: Path, vars: dict) -> list[Path]: if not tree.is_dir(): return written for src in tree.rglob("*"): + if _is_ignored_tree_cache(src, tree): + continue rel = src.relative_to(tree) out = dest / rel if src.is_dir(): @@ -89,6 +103,11 @@ def render_tree(self, dest: Path, vars: dict) -> list[Path]: return written +def _is_ignored_tree_cache(path: Path, tree: Path) -> bool: + rel = path.relative_to(tree) + return any(part in _TREE_CACHE_DIRS for part in rel.parts) + + class StackRegistry: """Lazy registry: scans the stacks dir on first access.""" diff --git a/simplicio/templates/stacks/go-gin/README.md b/simplicio/templates/stacks/go-gin/README.md new file mode 100644 index 0000000..1a452c9 --- /dev/null +++ b/simplicio/templates/stacks/go-gin/README.md @@ -0,0 +1,33 @@ +# go-gin + +Go 1.22 + Gin scaffold for small REST APIs where fast startup, simple +deployment, and a single static binary matter. + +## When to use this stack + +- Backend-only JSON API +- Team wants compile-time checks and low runtime overhead +- Service should ship as a container or single binary +- CRUD or webhook service with straightforward HTTP routing + +## When NOT to use this stack + +- SSR web application - use `ts-nextjs` +- Laravel ecosystem or PHP team conventions - use `php-laravel` +- Heavy async Python ecosystem integrations - use `py-fastapi` + +## Layout produced + +``` +/ ++-- cmd/server/main.go # entrypoint ++-- internal/http/ # Gin router and handlers ++-- go.mod ++-- README.md +``` + +## Verify-loop + +- `install`: `go mod download` +- `test`: `go test ./...` +- `lint`: `go vet ./...` diff --git a/simplicio/templates/stacks/go-gin/practices.md b/simplicio/templates/stacks/go-gin/practices.md new file mode 100644 index 0000000..4450562 --- /dev/null +++ b/simplicio/templates/stacks/go-gin/practices.md @@ -0,0 +1,28 @@ +# go-gin best practices (planner reference) + +## Project structure + +- `cmd/server/main.go` creates the router and starts the HTTP server. +- `internal/http/router.go` owns route registration. +- One file per resource handler under `internal/http/`. +- Keep domain logic out of handlers when it grows beyond a tiny first draft. + +## API design + +- Use `gin.New()` plus explicit middleware instead of hidden globals. +- Return JSON with `c.JSON(status, payload)`. +- Keep handler function names action-oriented: `ListUnits`, `CreateUnit`. +- Validate request bodies before mutation and return `400` with an error object. + +## Testing + +- Use `httptest.NewRecorder()` and `http.NewRequest()` for handler tests. +- Keep tests under the same package when they inspect unexported helpers. +- Every route task should add or update a `go test ./...` passing test. + +## Output the planner SHOULD produce for this stack + +- Tasks order: router setup -> resource model -> handlers -> tests. +- Each task touches ONE file. +- `test_command` = `go test ./...` +- `lint_command` = `go vet ./...` diff --git a/simplicio/templates/stacks/go-gin/stack.json b/simplicio/templates/stacks/go-gin/stack.json new file mode 100644 index 0000000..a6478c9 --- /dev/null +++ b/simplicio/templates/stacks/go-gin/stack.json @@ -0,0 +1,13 @@ +{ + "slug": "go-gin", + "template_version": "0.1.0", + "language": "Go 1.22", + "framework": "Gin", + "framework_version": "^1.10", + "package_manager": "go", + "test_runner": "go test", + "linter": "go vet", + "deps_required": ["github.com/gin-gonic/gin@v1.10.0"], + "deps_dev": [], + "tags": ["web", "api", "go", "rest", "compiled"] +} diff --git a/simplicio/templates/stacks/go-gin/tree/README.md b/simplicio/templates/stacks/go-gin/tree/README.md new file mode 100644 index 0000000..acaff84 --- /dev/null +++ b/simplicio/templates/stacks/go-gin/tree/README.md @@ -0,0 +1,10 @@ +# {project_name} + +{goal} + +## Commands + +- `go mod download` +- `go test ./...` +- `go vet ./...` +- `go run ./cmd/server` diff --git a/simplicio/templates/stacks/go-gin/tree/cmd/server/main.go b/simplicio/templates/stacks/go-gin/tree/cmd/server/main.go new file mode 100644 index 0000000..a7e0318 --- /dev/null +++ b/simplicio/templates/stacks/go-gin/tree/cmd/server/main.go @@ -0,0 +1,14 @@ +package main + +import ( + "log" + + apphttp "{project_name}/internal/http" +) + +func main() { + router := apphttp.NewRouter() + if err := router.Run(":8080"); err != nil { + log.Fatal(err) + } +} diff --git a/simplicio/templates/stacks/go-gin/tree/go.mod b/simplicio/templates/stacks/go-gin/tree/go.mod new file mode 100644 index 0000000..dbf0d3c --- /dev/null +++ b/simplicio/templates/stacks/go-gin/tree/go.mod @@ -0,0 +1,5 @@ +module {project_name} + +go 1.22 + +require github.com/gin-gonic/gin v1.10.0 diff --git a/simplicio/templates/stacks/go-gin/tree/internal/http/router.go b/simplicio/templates/stacks/go-gin/tree/internal/http/router.go new file mode 100644 index 0000000..dc27b9b --- /dev/null +++ b/simplicio/templates/stacks/go-gin/tree/internal/http/router.go @@ -0,0 +1,18 @@ +package http + +import ( + "net/http" + + "github.com/gin-gonic/gin" +) + +func NewRouter() *gin.Engine { + router := gin.New() + router.Use(gin.Logger(), gin.Recovery()) + router.GET("/health", Health) + return router +} + +func Health(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"status": "ok"}) +} diff --git a/simplicio/templates/stacks/go-gin/tree/internal/http/router_test.go b/simplicio/templates/stacks/go-gin/tree/internal/http/router_test.go new file mode 100644 index 0000000..2c3446a --- /dev/null +++ b/simplicio/templates/stacks/go-gin/tree/internal/http/router_test.go @@ -0,0 +1,22 @@ +package http + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestHealth(t *testing.T) { + router := NewRouter() + response := httptest.NewRecorder() + request, err := http.NewRequest(http.MethodGet, "/health", nil) + if err != nil { + t.Fatal(err) + } + + router.ServeHTTP(response, request) + + if response.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", response.Code) + } +} diff --git a/simplicio/templates/stacks/go-gin/verify.json b/simplicio/templates/stacks/go-gin/verify.json new file mode 100644 index 0000000..343a2e7 --- /dev/null +++ b/simplicio/templates/stacks/go-gin/verify.json @@ -0,0 +1,6 @@ +{ + "install": "go mod download", + "test": "go test ./...", + "test_runner": "go test", + "lint": "go vet ./..." +} diff --git a/simplicio/templates/stacks/ts-nextjs/tree/package.json b/simplicio/templates/stacks/ts-nextjs/tree/package.json index 13ddbf9..dbd84f2 100644 --- a/simplicio/templates/stacks/ts-nextjs/tree/package.json +++ b/simplicio/templates/stacks/ts-nextjs/tree/package.json @@ -23,6 +23,7 @@ "@types/react-dom": "^18", "eslint": "^8", "eslint-config-next": "^14.2", + "ts-morph": "^28.0.0", "typescript": "^5", "vitest": "^2" } diff --git a/tests/python/test_scratch.py b/tests/python/test_scratch.py index b439812..79ba367 100644 --- a/tests/python/test_scratch.py +++ b/tests/python/test_scratch.py @@ -1,4 +1,5 @@ """Unit tests for simplicio.scratch — schema, registry, executor stub mode.""" + from __future__ import annotations import os @@ -12,7 +13,7 @@ PlanValidationError, validate_plan, ) -from simplicio.scratch.stack_registry import StackRegistry, slugify_project +from simplicio.scratch.stack_registry import Stack, StackRegistry, slugify_project # ----- plan_schema ----- # @@ -48,9 +49,7 @@ def test_rejects_estimated_count_mismatch() -> None: def test_rejects_unknown_depends_on() -> None: bad = { **EXAMPLE_PLAN, - "tasks": [ - {**EXAMPLE_PLAN["tasks"][0], "depends_on": ["T99-ghost"]} - ], + "tasks": [{**EXAMPLE_PLAN["tasks"][0], "depends_on": ["T99-ghost"]}], } with pytest.raises(PlanValidationError) as exc: validate_plan(bad) @@ -76,6 +75,7 @@ def test_registry_lists_pilot_stacks() -> None: slugs = {s.slug for s in reg.list()} assert "py-fastapi" in slugs assert "ts-nextjs" in slugs + assert "go-gin" in slugs def test_registry_loads_full_metadata() -> None: @@ -94,6 +94,39 @@ def test_registry_filters_by_tag() -> None: web_stacks = {s.slug for s in reg.by_tags(["web"])} assert "py-fastapi" in web_stacks assert "ts-nextjs" in web_stacks + assert "go-gin" in web_stacks + + +def test_registry_loads_go_gin_stack_metadata() -> None: + reg = StackRegistry() + stack = reg.get("go-gin") + assert stack is not None + assert stack.language.startswith("Go") + assert stack.framework == "Gin" + assert stack.install_command == "go mod download" + assert stack.test_command == "go test ./..." + assert "best practices" in stack.practices.lower() + + +def test_stack_render_tree_ignores_tool_cache_dirs() -> None: + with tempfile.TemporaryDirectory() as td: + root = Path(td) / "stack" + tree = root / "tree" + (tree / ".ruff_cache" / "0.15.13").mkdir(parents=True) + (tree / ".ruff_cache" / "0.15.13" / "cache").write_bytes(b"\xff\xfe") + (tree / ".gitignore").write_text("*.pyc\n", encoding="utf-8") + (tree / "README.md").write_text("# {project_name}\n", encoding="utf-8") + + dest = Path(td) / "out" + stack = Stack(slug="test", path=root) + written = stack.render_tree(dest, {"project_name": "demo"}) + + assert sorted(p.relative_to(dest).as_posix() for p in written) == [ + ".gitignore", + "README.md", + ] + assert (dest / "README.md").read_text(encoding="utf-8") == "# demo\n" + assert not (dest / ".ruff_cache").exists() def test_slugify_project_normalizes_name() -> None: @@ -129,12 +162,51 @@ def test_executor_scaffolds_tree_in_stub_mode() -> None: # tasks in stub mode are recorded but not passed assert report.tasks_total == 1 assert report.tasks_passed == 0 + assert report.metrics["tasks_skipped"] == 1 + assert report.metrics["codegen_share"] == 0.0 assert report.task_results[0].skipped_reason is not None finally: if prev is not None: os.environ["SIMPLICIO_MODEL"] = prev +def test_executor_report_records_codegen_metrics() -> None: + reg = StackRegistry() + stack = reg.get("ts-nextjs") + assert stack is not None + plan = validate_plan( + { + **EXAMPLE_PLAN, + "stack": "ts-nextjs", + "project_name": "next-api", + "tasks": [ + { + "id": "T01-next-route", + "depends_on": [], + "goal": "Create Next.js route handlers for Unit CRUD", + "target": "src/app/api/units/route.ts", + "criteria": "- exports GET and POST handlers\n- returns JSON", + "constraints": "- use deterministic codegen", + "verify": "pnpm vitest run", + } + ], + "estimated_total_tasks": 1, + } + ) + + with tempfile.TemporaryDirectory() as td: + from simplicio.scratch.executor import execute_plan + + report = execute_plan(plan, stack, Path(td), skip_install=True) + data = report.to_dict() + + assert report.metrics["tasks_codegen"] == 1 + assert report.metrics["codegen_share"] == 1.0 + assert data["tasks"][0]["execution_mode"] == "codegen" + assert data["tasks"][0]["codegen_executor"] == "typescript-add-next-route" + assert (report.project_dir / "src/app/api/units/route.ts").is_file() + + def test_executor_refuses_existing_project_dir() -> None: reg = StackRegistry() stack = reg.get("py-fastapi") diff --git a/tests/python/test_scratch_codegen_fastapi.py b/tests/python/test_scratch_codegen_fastapi.py new file mode 100644 index 0000000..122a305 --- /dev/null +++ b/tests/python/test_scratch_codegen_fastapi.py @@ -0,0 +1,101 @@ +"""Tests for deterministic FastAPI route scratch codegen.""" + +from __future__ import annotations + +import ast +from pathlib import Path + +from simplicio.scratch.codegen import PythonAddFastApiRouteExecutor +from simplicio.scratch.codegen import registry as codegen_registry +from simplicio.scratch.plan_schema import Task +from simplicio.scratch.stack_registry import Stack + + +def _stack(tmp_path: Path) -> Stack: + return Stack( + slug="py-fastapi", + path=tmp_path, + meta={"language": "Python", "framework": "FastAPI"}, + ) + + +def _task(goal: str = "Add GET `/users/{id}` endpoint to the users route") -> Task: + return Task( + id="T02-api-route", + goal=goal, + target="src/api/users.py", + criteria="- exposes @router.get with async handler and return type", + constraints="- keep existing imports", + verify="pytest tests/api/test_users.py -q", + ) + + +def _write_route(tmp_path: Path, content: str) -> Path: + path = tmp_path / "src/api/users.py" + path.parent.mkdir(parents=True) + path.write_text(content, encoding="utf-8") + return path + + +def test_python_add_fastapi_route_appends_get_endpoint(tmp_path): + route_path = _write_route( + tmp_path, + """from fastapi import APIRouter + +router = APIRouter() +""", + ) + + result = PythonAddFastApiRouteExecutor().execute( + _task(), tmp_path, _stack(tmp_path) + ) + + assert result.passed is True + assert result.fallback_to_llm is False + assert result.files_modified == [route_path] + updated = route_path.read_text(encoding="utf-8") + ast.parse(updated) + assert '@router.get("/users/{id}")' in updated + assert "async def get_user(id: str) -> dict[str, str]:" in updated + assert 'return {"id": id}' in updated + + +def test_python_add_fastapi_route_adds_router_scaffold_when_missing(tmp_path): + route_path = _write_route(tmp_path, "from fastapi import Depends\n") + + result = PythonAddFastApiRouteExecutor().execute( + _task("Add POST `/users` route for user creation"), + tmp_path, + _stack(tmp_path), + ) + + assert result.passed is True + updated = route_path.read_text(encoding="utf-8") + ast.parse(updated) + assert "from fastapi import Depends, APIRouter" in updated + assert "router = APIRouter()" in updated + assert '@router.post("/users")' in updated + assert "async def create_user() -> dict[str, str]:" in updated + + +def test_python_add_fastapi_route_falls_back_for_ambiguous_route(tmp_path): + route_path = _write_route(tmp_path, "router = object()\n") + original = route_path.read_text(encoding="utf-8") + + result = PythonAddFastApiRouteExecutor().execute( + _task("Add an endpoint for users"), + tmp_path, + _stack(tmp_path), + ) + + assert result.passed is False + assert result.fallback_to_llm is True + assert "unsupported FastAPI route task shape" in result.log + assert route_path.read_text(encoding="utf-8") == original + + +def test_default_registry_includes_python_add_fastapi_route_executor(): + assert any( + isinstance(executor, PythonAddFastApiRouteExecutor) + for executor in codegen_registry.registered_executors() + ) diff --git a/tests/python/test_scratch_codegen_next_route.py b/tests/python/test_scratch_codegen_next_route.py new file mode 100644 index 0000000..7fd0808 --- /dev/null +++ b/tests/python/test_scratch_codegen_next_route.py @@ -0,0 +1,189 @@ +"""Tests for deterministic Next.js route scratch codegen.""" + +from __future__ import annotations + +import json +import shutil +import subprocess +from pathlib import Path + +from simplicio.scratch.codegen import TypeScriptAddNextRouteExecutor +from simplicio.scratch.codegen.typescript_next_route import _ts_morph_env +from simplicio.scratch.codegen import registry as codegen_registry +from simplicio.scratch.plan_schema import Task +from simplicio.scratch.stack_registry import Stack + + +def _stack(tmp_path: Path) -> Stack: + return Stack( + slug="ts-nextjs", + path=tmp_path, + meta={"language": "TypeScript 5", "framework": "Next.js 14 (app router)"}, + ) + + +def _task(goal: str = "Create Next.js route handlers for Unit CRUD") -> Task: + return Task( + id="T02-next-route", + goal=goal, + target="src/app/api/units/route.ts", + criteria="- exports GET and POST handlers\n- returns JSON responses", + constraints="- no external dependencies", + verify="pnpm vitest run src/app/api/units/route.test.ts", + ) + + +def test_typescript_add_next_route_executor_creates_json_handlers(tmp_path): + executor = TypeScriptAddNextRouteExecutor() + result = executor.execute(_task(), tmp_path, _stack(tmp_path)) + + route = tmp_path / "src/app/api/units/route.ts" + assert result.passed is True + assert result.fallback_to_llm is False + assert result.files_modified == [route] + generated = route.read_text(encoding="utf-8") + assert "export async function GET(): Promise" in generated + assert "return Response.json(units);" in generated + assert ( + "export async function POST(request: Request): Promise" in generated + ) + assert "return Response.json(body, { status: 201 });" in generated + + +def test_typescript_add_next_route_executor_outputs_runnable_json_handlers(tmp_path): + result = TypeScriptAddNextRouteExecutor().execute( + _task(), tmp_path, _stack(tmp_path) + ) + assert result.passed is True + + route = tmp_path / "src/app/api/units/route.ts" + ok, env_or_log = _ts_morph_env(tmp_path) + assert ok, env_or_log + node = shutil.which("node") or shutil.which("node.exe") + assert node is not None + + proc = subprocess.run( + [ + node, + "-e", + _ROUTE_RUNTIME_CHECK, + str(route), + ], + capture_output=True, + text=True, + env=env_or_log, + timeout=30, + ) + + assert proc.returncode == 0, proc.stderr + assert json.loads(proc.stdout) == { + "get": [], + "post": {"name": "Unit 1"}, + "postStatus": 201, + } + + +def test_typescript_add_next_route_executor_appends_missing_handler(tmp_path): + route = tmp_path / "src/app/api/units/route.ts" + route.parent.mkdir(parents=True) + route.write_text( + """export async function GET(): Promise { + return Response.json([]); +} +""", + encoding="utf-8", + ) + + result = TypeScriptAddNextRouteExecutor().execute( + _task("Add POST endpoint to `/api/units` route"), + tmp_path, + _stack(tmp_path), + ) + + generated = route.read_text(encoding="utf-8") + assert result.passed is True + assert generated.count("export async function GET") == 1 + assert ( + "export async function POST(request: Request): Promise" in generated + ) + + +def test_typescript_add_next_route_executor_falls_back_for_non_route_target(tmp_path): + result = TypeScriptAddNextRouteExecutor().execute( + Task( + id="T02-next-route", + goal="Create Next.js route handlers for Unit CRUD", + target="src/app/units/page.tsx", + criteria="- no route file", + constraints="", + verify="pnpm vitest run", + ), + tmp_path, + _stack(tmp_path), + ) + + assert result.passed is False + assert result.fallback_to_llm is True + assert "unsupported Next.js route task shape" in result.log + + +def test_default_registry_includes_typescript_next_route_executor(): + assert any( + isinstance(executor, TypeScriptAddNextRouteExecutor) + for executor in codegen_registry.registered_executors() + ) + + +_ROUTE_RUNTIME_CHECK = r""" +const fs = require("fs"); +const ts = require("typescript"); +const vm = require("vm"); + +(async () => { + const source = fs.readFileSync(process.argv[1], "utf8"); + const program = ts.createProgram([process.argv[1]], { + noEmit: true, + strict: true, + target: ts.ScriptTarget.ES2022, + module: ts.ModuleKind.CommonJS, + lib: ["lib.es2022.d.ts", "lib.dom.d.ts"], + skipLibCheck: true, + }); + const diagnostics = ts.getPreEmitDiagnostics(program); + if (diagnostics.length > 0) { + console.error(ts.formatDiagnosticsWithColorAndContext(diagnostics, { + getCanonicalFileName: (fileName) => fileName, + getCurrentDirectory: () => process.cwd(), + getNewLine: () => "\n", + })); + process.exit(1); + } + + const output = ts.transpileModule(source, { + compilerOptions: { + target: ts.ScriptTarget.ES2022, + module: ts.ModuleKind.CommonJS, + }, + }).outputText; + const context = { + exports: {}, + Response, + Request, + }; + vm.runInNewContext(output, context); + const getResponse = await context.exports.GET(); + const postResponse = await context.exports.POST(new Request("https://example.test/api/units", { + method: "POST", + body: JSON.stringify({ name: "Unit 1" }), + headers: { "content-type": "application/json" }, + })); + console.log(JSON.stringify({ + get: await getResponse.json(), + post: await postResponse.json(), + postStatus: postResponse.status, + })); +})().catch((error) => { + console.error(error && error.stack ? error.stack : String(error)); + process.exit(1); +}); +""" diff --git a/tests/python/test_scratch_codegen_orm.py b/tests/python/test_scratch_codegen_orm.py new file mode 100644 index 0000000..0f52043 --- /dev/null +++ b/tests/python/test_scratch_codegen_orm.py @@ -0,0 +1,94 @@ +"""Tests for deterministic SQLAlchemy ORM scratch codegen.""" + +from __future__ import annotations + +import ast +from pathlib import Path + +from simplicio.scratch.codegen import PythonAddOrmFieldExecutor +from simplicio.scratch.codegen import registry as codegen_registry +from simplicio.scratch.plan_schema import Task +from simplicio.scratch.stack_registry import Stack + + +def _stack(tmp_path: Path) -> Stack: + return Stack( + slug="py-fastapi", + path=tmp_path, + meta={"language": "Python", "framework": "FastAPI"}, + ) + + +def _task(goal: str = "Add email: Mapped[str] field to User model") -> Task: + return Task( + id="T01-db-model", + goal=goal, + target="src/db/models.py", + criteria="- User has email: Mapped[str]", + constraints="- use SQLAlchemy 2.0 declarative style", + verify="pytest tests/db/test_models.py -q", + ) + + +def _write_models(tmp_path: Path, content: str) -> Path: + path = tmp_path / "src/db/models.py" + path.parent.mkdir(parents=True) + path.write_text(content, encoding="utf-8") + return path + + +def test_python_add_orm_field_adds_email_to_sqlalchemy_model(tmp_path): + models_path = _write_models( + tmp_path, + """from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + pass + + +class User(Base): + __tablename__ = "users" + + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] +""", + ) + + executor = PythonAddOrmFieldExecutor() + task = _task() + result = executor.execute(task, tmp_path, _stack(tmp_path)) + + assert result.passed is True + assert result.fallback_to_llm is False + assert result.files_modified == [models_path] + updated = models_path.read_text(encoding="utf-8") + ast.parse(updated) + assert " name: Mapped[str]\n email: Mapped[str]\n" in updated + assert ( + "from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column" in updated + ) + + +def test_python_add_orm_field_falls_back_when_model_shape_is_unsupported(tmp_path): + models_path = _write_models( + tmp_path, + """class User: + pass +""", + ) + original = models_path.read_text(encoding="utf-8") + + result = PythonAddOrmFieldExecutor().execute(_task(), tmp_path, _stack(tmp_path)) + + assert result.passed is False + assert result.fallback_to_llm is True + assert "not a SQLAlchemy model" in result.log + assert models_path.read_text(encoding="utf-8") == original + + +def test_default_registry_includes_python_add_orm_field_executor(): + assert any( + isinstance(executor, PythonAddOrmFieldExecutor) + for executor in codegen_registry.registered_executors() + ) diff --git a/tests/python/test_scratch_codegen_pydantic.py b/tests/python/test_scratch_codegen_pydantic.py new file mode 100644 index 0000000..6d1e02a --- /dev/null +++ b/tests/python/test_scratch_codegen_pydantic.py @@ -0,0 +1,157 @@ +"""Tests for deterministic Pydantic schema scratch codegen.""" + +from __future__ import annotations + +import ast +from pathlib import Path + +from simplicio.scratch.codegen import PythonAddPydanticSchemaExecutor +from simplicio.scratch.codegen import registry as codegen_registry +from simplicio.scratch.plan_schema import Task +from simplicio.scratch.stack_registry import Stack + + +def _stack(tmp_path: Path) -> Stack: + return Stack( + slug="py-fastapi", + path=tmp_path, + meta={"language": "Python", "framework": "FastAPI"}, + ) + + +def _task( + goal: str = "Create Pydantic schemas for User create, update, and read flows.", +) -> Task: + return Task( + id="T02-api-schemas", + goal=goal, + target="src/api/schemas/user.py", + criteria=( + "- UserCreate, UserUpdate, and UserRead schemas exist\n" + "- optional update fields are supported" + ), + constraints="- keep schemas framework-agnostic", + verify="pytest tests/api/test_users.py -q", + ) + + +def _write_model(tmp_path: Path, content: str, name: str = "user.py") -> Path: + path = tmp_path / f"src/db/{name}" + path.parent.mkdir(parents=True) + path.write_text(content, encoding="utf-8") + return path + + +def test_python_add_pydantic_schema_derives_crud_schemas_from_model(tmp_path): + _write_model( + tmp_path, + """from datetime import datetime + +from sqlalchemy import func +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + pass + + +class User(Base): + __tablename__ = "users" + + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] + email: Mapped[str | None] = mapped_column(nullable=True) + created_at: Mapped[datetime] = mapped_column(default=func.now()) +""", + ) + + executor = PythonAddPydanticSchemaExecutor() + result = executor.execute(_task(), tmp_path, _stack(tmp_path)) + + schema_path = tmp_path / "src/api/schemas/user.py" + assert result.passed is True + assert result.fallback_to_llm is False + assert result.files_modified == [schema_path] + + generated = schema_path.read_text(encoding="utf-8") + ast.parse(generated) + assert "from __future__ import annotations" in generated + assert "from datetime import datetime" in generated + assert "from pydantic import BaseModel, ConfigDict" in generated + assert ( + "class UserCreate(BaseModel):\n name: str\n email: str | None = None" + in generated + ) + assert ( + "class UserUpdate(BaseModel):\n" + " name: str | None = None\n" + " email: str | None = None" + ) in generated + assert ( + "class UserRead(BaseModel):\n" + " model_config = ConfigDict(from_attributes=True)\n\n" + " id: int\n" + " name: str\n" + " email: str | None\n" + " created_at: datetime" + ) in generated + + +def test_python_add_pydantic_schema_appends_missing_classes(tmp_path): + _write_model( + tmp_path, + """from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + pass + + +class User(Base): + __tablename__ = "users" + + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] +""", + ) + schema_path = tmp_path / "src/api/schemas/user.py" + schema_path.parent.mkdir(parents=True) + schema_path.write_text( + """from pydantic import BaseModel + + +class UserCreate(BaseModel): + name: str +""", + encoding="utf-8", + ) + + result = PythonAddPydanticSchemaExecutor().execute( + _task(), tmp_path, _stack(tmp_path) + ) + + assert result.passed is True + updated = schema_path.read_text(encoding="utf-8") + ast.parse(updated) + assert "from pydantic import BaseModel, ConfigDict" in updated + assert updated.count("class UserCreate(BaseModel):") == 1 + assert "class UserUpdate(BaseModel):" in updated + assert "class UserRead(BaseModel):" in updated + + +def test_python_add_pydantic_schema_falls_back_when_model_is_missing(tmp_path): + result = PythonAddPydanticSchemaExecutor().execute( + _task(), tmp_path, _stack(tmp_path) + ) + + assert result.passed is False + assert result.fallback_to_llm is True + assert "unsupported Pydantic schema task shape" in result.log + assert not (tmp_path / "src/api/schemas/user.py").exists() + + +def test_default_registry_includes_python_add_pydantic_schema_executor(): + assert any( + isinstance(executor, PythonAddPydanticSchemaExecutor) + for executor in codegen_registry.registered_executors() + ) diff --git a/tests/python/test_scratch_codegen_pytest.py b/tests/python/test_scratch_codegen_pytest.py new file mode 100644 index 0000000..62dc683 --- /dev/null +++ b/tests/python/test_scratch_codegen_pytest.py @@ -0,0 +1,108 @@ +"""Tests for deterministic pytest scratch codegen.""" + +from __future__ import annotations + +import ast +import subprocess +import sys +from pathlib import Path + +from simplicio.scratch.codegen import PythonAddPytestTestExecutor +from simplicio.scratch.codegen import registry as codegen_registry +from simplicio.scratch.plan_schema import Task +from simplicio.scratch.stack_registry import Stack + + +def _stack(tmp_path: Path) -> Stack: + return Stack( + slug="py-fastapi", + path=tmp_path, + meta={"language": "Python", "framework": "FastAPI"}, + ) + + +def _task( + goal: str = ( + "Generate a happy-path pytest for function double in src/utils/math_ops.py" + ), +) -> Task: + return Task( + id="T02-pytest", + goal=goal, + target="tests/unit/test_math_ops.py", + criteria="- imports the function under test\n- has a sane assert", + constraints="- use pytest", + verify="pytest tests/unit/test_math_ops.py -q", + ) + + +def _write_pyproject(tmp_path: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + """[tool.pytest.ini_options] +pythonpath = ["src"] +""", + encoding="utf-8", + ) + + +def _write_source(tmp_path: Path) -> Path: + source = tmp_path / "src/utils/math_ops.py" + source.parent.mkdir(parents=True) + source.write_text( + """def double(value: int) -> int: + return value * 2 +""", + encoding="utf-8", + ) + return source + + +def test_python_add_pytest_test_generates_runnable_happy_path(tmp_path): + _write_pyproject(tmp_path) + _write_source(tmp_path) + + executor = PythonAddPytestTestExecutor() + result = executor.execute(_task(), tmp_path, _stack(tmp_path)) + + test_path = tmp_path / "tests/unit/test_math_ops.py" + assert result.passed is True + assert result.fallback_to_llm is False + assert result.files_modified == [test_path] + + generated = test_path.read_text(encoding="utf-8") + ast.parse(generated) + assert "from utils.math_ops import double" in generated + assert "def test_double_happy_path() -> None:" in generated + assert "result = double(1)" in generated + assert "assert isinstance(result, int)" in generated + + completed = subprocess.run( + [sys.executable, "-m", "pytest", "-q", "tests/unit/test_math_ops.py"], + cwd=tmp_path, + capture_output=True, + text=True, + timeout=30, + ) + assert completed.returncode == 0, completed.stdout + completed.stderr + + +def test_python_add_pytest_test_falls_back_when_function_cannot_be_resolved(tmp_path): + _write_pyproject(tmp_path) + + result = PythonAddPytestTestExecutor().execute( + _task("Generate a happy-path pytest for function missing in src/missing.py"), + tmp_path, + _stack(tmp_path), + ) + + assert result.passed is False + assert result.fallback_to_llm is True + assert "could not resolve" in result.log + assert not (tmp_path / "tests/unit/test_math_ops.py").exists() + + +def test_default_registry_includes_python_add_pytest_test_executor(): + assert any( + isinstance(executor, PythonAddPytestTestExecutor) + for executor in codegen_registry.registered_executors() + ) From e22478b6cae347d018a32b2a883381d32c1c7695 Mon Sep 17 00:00:00 2001 From: Wesley Simplicio Date: Sat, 30 May 2026 06:26:22 -0300 Subject: [PATCH 3/3] feat: expand scratch reduction evidence --- bench/results_scratch_codegen.json | 1293 +++++++++++++++++ bench/results_scratch_codegen.md | 75 + bench/results_static_fixers.json | 633 ++++++++ bench/results_static_fixers.md | 73 + bench/run_scratch_codegen.py | 485 +++++++ bench/run_static_fixers.py | 313 ++++ pyproject.toml | 10 +- simplicio/_cache.py | 23 +- simplicio/providers.py | 204 ++- simplicio/scratch/executor.py | 71 +- simplicio/scratch/plan_schema.py | 9 +- simplicio/scratch/planner.py | 11 +- .../templates/stacks/php-laravel/README.md | 35 + .../templates/stacks/php-laravel/practices.md | 30 + .../templates/stacks/php-laravel/stack.json | 13 + .../stacks/php-laravel/tree/README.md | 11 + .../templates/stacks/php-laravel/tree/artisan | 14 + .../stacks/php-laravel/tree/bootstrap/app.php | 19 + .../stacks/php-laravel/tree/composer.json | 29 + .../stacks/php-laravel/tree/phpunit.xml | 17 + .../stacks/php-laravel/tree/routes/api.php | 7 + .../php-laravel/tree/routes/console.php | 7 + .../tree/tests/Feature/HealthTest.php | 15 + .../php-laravel/tree/tests/TestCase.php | 10 + .../templates/stacks/php-laravel/verify.json | 6 + .../templates/stacks/rust-axum/README.md | 32 + .../templates/stacks/rust-axum/practices.md | 30 + .../templates/stacks/rust-axum/stack.json | 13 + .../stacks/rust-axum/tree/Cargo.toml | 12 + .../templates/stacks/rust-axum/tree/README.md | 11 + .../stacks/rust-axum/tree/src/main.rs | 43 + .../templates/stacks/rust-axum/verify.json | 6 + tests/python/test_cache.py | 50 +- tests/python/test_scratch.py | 135 ++ tests/python/test_scratch_codegen.py | 38 + tests/python/test_scratch_codegen_bench.py | 47 + tests/python/test_static_fixers_bench.py | 38 + 37 files changed, 3803 insertions(+), 65 deletions(-) create mode 100644 bench/results_scratch_codegen.json create mode 100644 bench/results_scratch_codegen.md create mode 100644 bench/results_static_fixers.json create mode 100644 bench/results_static_fixers.md create mode 100644 bench/run_scratch_codegen.py create mode 100644 bench/run_static_fixers.py create mode 100644 simplicio/templates/stacks/php-laravel/README.md create mode 100644 simplicio/templates/stacks/php-laravel/practices.md create mode 100644 simplicio/templates/stacks/php-laravel/stack.json create mode 100644 simplicio/templates/stacks/php-laravel/tree/README.md create mode 100644 simplicio/templates/stacks/php-laravel/tree/artisan create mode 100644 simplicio/templates/stacks/php-laravel/tree/bootstrap/app.php create mode 100644 simplicio/templates/stacks/php-laravel/tree/composer.json create mode 100644 simplicio/templates/stacks/php-laravel/tree/phpunit.xml create mode 100644 simplicio/templates/stacks/php-laravel/tree/routes/api.php create mode 100644 simplicio/templates/stacks/php-laravel/tree/routes/console.php create mode 100644 simplicio/templates/stacks/php-laravel/tree/tests/Feature/HealthTest.php create mode 100644 simplicio/templates/stacks/php-laravel/tree/tests/TestCase.php create mode 100644 simplicio/templates/stacks/php-laravel/verify.json create mode 100644 simplicio/templates/stacks/rust-axum/README.md create mode 100644 simplicio/templates/stacks/rust-axum/practices.md create mode 100644 simplicio/templates/stacks/rust-axum/stack.json create mode 100644 simplicio/templates/stacks/rust-axum/tree/Cargo.toml create mode 100644 simplicio/templates/stacks/rust-axum/tree/README.md create mode 100644 simplicio/templates/stacks/rust-axum/tree/src/main.rs create mode 100644 simplicio/templates/stacks/rust-axum/verify.json create mode 100644 tests/python/test_scratch_codegen_bench.py create mode 100644 tests/python/test_static_fixers_bench.py diff --git a/bench/results_scratch_codegen.json b/bench/results_scratch_codegen.json new file mode 100644 index 0000000..1c66bf9 --- /dev/null +++ b/bench/results_scratch_codegen.json @@ -0,0 +1,1293 @@ +{ + "benchmark": "scratch-codegen", + "cases": [ + { + "actual_executor": "python-add-orm-field", + "duration_ms": 135, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r01\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 135, + "avg_llm_ms": 0, + "avg_task_ms": 135, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r01", + "run_index": 1, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 4, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r01\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 4, + "avg_llm_ms": 0, + "avg_task_ms": 4, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r01", + "run_index": 1, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 1, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r01\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 1, + "avg_llm_ms": 0, + "avg_task_ms": 1, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r01", + "run_index": 1, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 4, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r01\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 4, + "avg_llm_ms": 0, + "avg_task_ms": 4, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r01", + "run_index": 1, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 325, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r01\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 325, + "avg_llm_ms": 0, + "avg_task_ms": 325, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r01", + "run_index": 1, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r02\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r02", + "run_index": 2, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r02\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r02", + "run_index": 2, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r02\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r02", + "run_index": 2, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r02\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r02", + "run_index": 2, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 316, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r02\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 316, + "avg_llm_ms": 0, + "avg_task_ms": 316, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r02", + "run_index": 2, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r03\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r03", + "run_index": 3, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r03\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r03", + "run_index": 3, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 1, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r03\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 1, + "avg_llm_ms": 0, + "avg_task_ms": 1, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r03", + "run_index": 3, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r03\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r03", + "run_index": 3, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 327, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r03\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 327, + "avg_llm_ms": 0, + "avg_task_ms": 327, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r03", + "run_index": 3, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r04\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r04", + "run_index": 4, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r04\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r04", + "run_index": 4, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 1, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r04\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 1, + "avg_llm_ms": 0, + "avg_task_ms": 1, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r04", + "run_index": 4, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r04\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r04", + "run_index": 4, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 389, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r04\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 389, + "avg_llm_ms": 0, + "avg_task_ms": 389, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r04", + "run_index": 4, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r05\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r05", + "run_index": 5, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r05\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r05", + "run_index": 5, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 1, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r05\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 1, + "avg_llm_ms": 0, + "avg_task_ms": 1, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r05", + "run_index": 5, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r05\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r05", + "run_index": 5, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 325, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r05\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 325, + "avg_llm_ms": 0, + "avg_task_ms": 325, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r05", + "run_index": 5, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r06\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r06", + "run_index": 6, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r06\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r06", + "run_index": 6, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 1, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r06\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 1, + "avg_llm_ms": 0, + "avg_task_ms": 1, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r06", + "run_index": 6, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r06\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r06", + "run_index": 6, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 306, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r06\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 306, + "avg_llm_ms": 0, + "avg_task_ms": 306, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r06", + "run_index": 6, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r07\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r07", + "run_index": 7, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r07\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r07", + "run_index": 7, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 1, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r07\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 1, + "avg_llm_ms": 0, + "avg_task_ms": 1, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r07", + "run_index": 7, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r07\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r07", + "run_index": 7, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 320, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r07\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 320, + "avg_llm_ms": 0, + "avg_task_ms": 320, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r07", + "run_index": 7, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r08\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r08", + "run_index": 8, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r08\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r08", + "run_index": 8, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r08\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r08", + "run_index": 8, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r08\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r08", + "run_index": 8, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 332, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r08\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 332, + "avg_llm_ms": 0, + "avg_task_ms": 332, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r08", + "run_index": 8, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 3, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r09\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 3, + "avg_llm_ms": 0, + "avg_task_ms": 3, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r09", + "run_index": 9, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r09\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r09", + "run_index": 9, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 1, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r09\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 1, + "avg_llm_ms": 0, + "avg_task_ms": 1, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r09", + "run_index": 9, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r09\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r09", + "run_index": 9, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 318, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r09\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 318, + "avg_llm_ms": 0, + "avg_task_ms": 318, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r09", + "run_index": 9, + "stack": "ts-nextjs", + "task_passed": true + }, + { + "actual_executor": "python-add-orm-field", + "duration_ms": 4, + "execution_mode": "codegen", + "expected_executor": "python-add-orm-field", + "expected_executor_match": true, + "log_tail": "added User.email: Mapped[str] with libcst\nfiles_modified=$WORK_DIR\\projects\\python-orm-field-r10\\src\\db\\models.py", + "metrics": { + "avg_codegen_ms": 4, + "avg_llm_ms": 0, + "avg_task_ms": 4, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-orm-field", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-orm-field-r10", + "run_index": 10, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pydantic-schema", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pydantic-schema", + "expected_executor_match": true, + "log_tail": "generated Pydantic schemas with libcst for User from src\\db\\user.py\nfiles_modified=$WORK_DIR\\projects\\python-pydantic-schema-r10\\src\\api\\schemas\\user.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pydantic-schema", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pydantic-schema-r10", + "run_index": 10, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-fastapi-route", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-fastapi-route", + "expected_executor_match": true, + "log_tail": "added FastAPI GET /users/{id} route with libcst\nfiles_modified=$WORK_DIR\\projects\\python-fastapi-route-r10\\src\\api\\users.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-fastapi-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-fastapi-route-r10", + "run_index": 10, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "python-add-pytest-test", + "duration_ms": 2, + "execution_mode": "codegen", + "expected_executor": "python-add-pytest-test", + "expected_executor_match": true, + "log_tail": "generated pytest with libcst tests\\unit\\test_math_ops.py for utils.math_ops.double\nfiles_modified=$WORK_DIR\\projects\\python-pytest-test-r10\\tests\\unit\\test_math_ops.py", + "metrics": { + "avg_codegen_ms": 2, + "avg_llm_ms": 0, + "avg_task_ms": 2, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "python-pytest-test", + "passed": true, + "project_dir": "$WORK_DIR/projects/python-pytest-test-r10", + "run_index": 10, + "stack": "py-fastapi", + "task_passed": true + }, + { + "actual_executor": "typescript-add-next-route", + "duration_ms": 332, + "execution_mode": "codegen", + "expected_executor": "typescript-add-next-route", + "expected_executor_match": true, + "log_tail": "generated Next.js route handlers with ts-morph GET, POST for units\nfiles_modified=$WORK_DIR\\projects\\typescript-next-route-r10\\src\\app\\api\\units\\route.ts", + "metrics": { + "avg_codegen_ms": 332, + "avg_llm_ms": 0, + "avg_task_ms": 332, + "codegen_share": 1.0, + "tasks_codegen": 1, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "tasks_total": 1 + }, + "name": "typescript-next-route", + "passed": true, + "project_dir": "$WORK_DIR/projects/typescript-next-route-r10", + "run_index": 10, + "stack": "ts-nextjs", + "task_passed": true + } + ], + "environment": { + "platform": "Windows-11-10.0.22631-SP0", + "python": "3.14.3" + }, + "include_typescript": true, + "repeat": 10, + "scope": "synthetic deterministic executor benchmark; no LLM calls; does not replace the full 50-scratch release gate", + "summary": { + "avg_codegen_ms": 70, + "codegen_share": 1.0, + "elapsed_s": 3.782, + "expected_executor_match_rate": 1.0, + "failed_cases": 0, + "llm_calls": 0, + "missing_release_evidence": [ + "LLM baseline pass-rate and latency comparison", + "50 real scratch goals across the release corpus", + "planner cache hit-rate measured across cold/warm scratch runs" + ], + "pass_rate": 1.0, + "passed_cases": 50, + "planner_calls": 0, + "release_gates": { + "executor_pass_rate_100": true, + "fifty_runs": true, + "latency_reduction_ge_50": null, + "llm_baseline_present": false, + "mechanical_share_ge_30": true + }, + "tasks_codegen": 50, + "tasks_failed": 0, + "tasks_llm": 0, + "tasks_skipped": 0, + "total_cases": 50, + "total_tasks": 50 + }, + "work_dir": "$WORK_DIR", + "work_dir_owned_by_runner": false +} \ No newline at end of file diff --git a/bench/results_scratch_codegen.md b/bench/results_scratch_codegen.md new file mode 100644 index 0000000..3e18377 --- /dev/null +++ b/bench/results_scratch_codegen.md @@ -0,0 +1,75 @@ +# Scratch Codegen Benchmark + +synthetic deterministic executor benchmark; no LLM calls; does not replace the full 50-scratch release gate + +## Summary + +- cases: 50/50 passed +- codegen share: 100.00% +- expected executor match: 100.00% +- avg codegen latency: 70 ms +- planner calls: 0 +- llm calls: 0 + +## Release Gate Status + +- fifty_runs: True +- mechanical_share_ge_30: True +- executor_pass_rate_100: True +- llm_baseline_present: False +- latency_reduction_ge_50: None + +## Cases + +| case | stack | executor | mode | passed | duration_ms | +| --- | --- | --- | --- | --- | ---: | +| python-orm-field r01 | py-fastapi | python-add-orm-field | codegen | True | 135 | +| python-pydantic-schema r01 | py-fastapi | python-add-pydantic-schema | codegen | True | 4 | +| python-fastapi-route r01 | py-fastapi | python-add-fastapi-route | codegen | True | 1 | +| python-pytest-test r01 | py-fastapi | python-add-pytest-test | codegen | True | 4 | +| typescript-next-route r01 | ts-nextjs | typescript-add-next-route | codegen | True | 325 | +| python-orm-field r02 | py-fastapi | python-add-orm-field | codegen | True | 3 | +| python-pydantic-schema r02 | py-fastapi | python-add-pydantic-schema | codegen | True | 2 | +| python-fastapi-route r02 | py-fastapi | python-add-fastapi-route | codegen | True | 2 | +| python-pytest-test r02 | py-fastapi | python-add-pytest-test | codegen | True | 2 | +| typescript-next-route r02 | ts-nextjs | typescript-add-next-route | codegen | True | 316 | +| python-orm-field r03 | py-fastapi | python-add-orm-field | codegen | True | 3 | +| python-pydantic-schema r03 | py-fastapi | python-add-pydantic-schema | codegen | True | 2 | +| python-fastapi-route r03 | py-fastapi | python-add-fastapi-route | codegen | True | 1 | +| python-pytest-test r03 | py-fastapi | python-add-pytest-test | codegen | True | 2 | +| typescript-next-route r03 | ts-nextjs | typescript-add-next-route | codegen | True | 327 | +| python-orm-field r04 | py-fastapi | python-add-orm-field | codegen | True | 3 | +| python-pydantic-schema r04 | py-fastapi | python-add-pydantic-schema | codegen | True | 3 | +| python-fastapi-route r04 | py-fastapi | python-add-fastapi-route | codegen | True | 1 | +| python-pytest-test r04 | py-fastapi | python-add-pytest-test | codegen | True | 3 | +| typescript-next-route r04 | ts-nextjs | typescript-add-next-route | codegen | True | 389 | +| python-orm-field r05 | py-fastapi | python-add-orm-field | codegen | True | 3 | +| python-pydantic-schema r05 | py-fastapi | python-add-pydantic-schema | codegen | True | 3 | +| python-fastapi-route r05 | py-fastapi | python-add-fastapi-route | codegen | True | 1 | +| python-pytest-test r05 | py-fastapi | python-add-pytest-test | codegen | True | 2 | +| typescript-next-route r05 | ts-nextjs | typescript-add-next-route | codegen | True | 325 | +| python-orm-field r06 | py-fastapi | python-add-orm-field | codegen | True | 3 | +| python-pydantic-schema r06 | py-fastapi | python-add-pydantic-schema | codegen | True | 2 | +| python-fastapi-route r06 | py-fastapi | python-add-fastapi-route | codegen | True | 1 | +| python-pytest-test r06 | py-fastapi | python-add-pytest-test | codegen | True | 2 | +| typescript-next-route r06 | ts-nextjs | typescript-add-next-route | codegen | True | 306 | +| python-orm-field r07 | py-fastapi | python-add-orm-field | codegen | True | 3 | +| python-pydantic-schema r07 | py-fastapi | python-add-pydantic-schema | codegen | True | 2 | +| python-fastapi-route r07 | py-fastapi | python-add-fastapi-route | codegen | True | 1 | +| python-pytest-test r07 | py-fastapi | python-add-pytest-test | codegen | True | 2 | +| typescript-next-route r07 | ts-nextjs | typescript-add-next-route | codegen | True | 320 | +| python-orm-field r08 | py-fastapi | python-add-orm-field | codegen | True | 3 | +| python-pydantic-schema r08 | py-fastapi | python-add-pydantic-schema | codegen | True | 2 | +| python-fastapi-route r08 | py-fastapi | python-add-fastapi-route | codegen | True | 2 | +| python-pytest-test r08 | py-fastapi | python-add-pytest-test | codegen | True | 3 | +| typescript-next-route r08 | ts-nextjs | typescript-add-next-route | codegen | True | 332 | +| python-orm-field r09 | py-fastapi | python-add-orm-field | codegen | True | 3 | +| python-pydantic-schema r09 | py-fastapi | python-add-pydantic-schema | codegen | True | 2 | +| python-fastapi-route r09 | py-fastapi | python-add-fastapi-route | codegen | True | 1 | +| python-pytest-test r09 | py-fastapi | python-add-pytest-test | codegen | True | 2 | +| typescript-next-route r09 | ts-nextjs | typescript-add-next-route | codegen | True | 318 | +| python-orm-field r10 | py-fastapi | python-add-orm-field | codegen | True | 4 | +| python-pydantic-schema r10 | py-fastapi | python-add-pydantic-schema | codegen | True | 2 | +| python-fastapi-route r10 | py-fastapi | python-add-fastapi-route | codegen | True | 2 | +| python-pytest-test r10 | py-fastapi | python-add-pytest-test | codegen | True | 2 | +| typescript-next-route r10 | ts-nextjs | typescript-add-next-route | codegen | True | 332 | diff --git a/bench/results_static_fixers.json b/bench/results_static_fixers.json new file mode 100644 index 0000000..03a48d4 --- /dev/null +++ b/bench/results_static_fixers.json @@ -0,0 +1,633 @@ +{ + "benchmark": "static-fixers", + "cases": [ + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-01", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-02", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-03", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-04", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-05", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-06", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-07", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-08", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-09", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-10", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-11", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-12", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-13", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-14", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-15", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-16", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-17", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-18", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-19", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-20", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-21", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-22", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-23", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-24", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-25", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-26", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-27", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-28", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-29", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-30", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-31", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-32", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-33", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-34", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-35", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-36", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-37", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-38", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-39", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": true, + "fixers": [ + "missing-pip-package" + ], + "name": "missing-pip-40", + "passed": true, + "resolvable": true, + "retry_calls_saved": 1, + "with_fixer_llm_calls": 1 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-01", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-02", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-03", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-04", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-05", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-06", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-07", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-08", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-09", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + }, + { + "baseline_llm_calls": 2, + "fixed_before_llm_retry": false, + "fixers": [ + "none" + ], + "name": "assertion-10", + "passed": true, + "resolvable": false, + "retry_calls_saved": 0, + "with_fixer_llm_calls": 2 + } + ], + "environment": { + "platform": "Windows-11-10.0.22631-SP0", + "python": "3.14.3" + }, + "scope": "synthetic verify-loop fixer benchmark; package installs and LLM generation are faked; does not replace the full 50-scratch gate", + "summary": { + "baseline_llm_calls": 100, + "elapsed_s": 0.199, + "fixer_resolved_before_retry": 40, + "fixer_resolved_rate": 0.8, + "missing_release_evidence": [ + "real install/import/lint failures from 50 scratch runs", + "non-faked package manager execution", + "comparison across actual scratch reports" + ], + "passed_cases": 50, + "release_gates": { + "fifty_cases": true, + "fixer_resolved_ge_80": true, + "real_scratch_corpus": false, + "retry_calls_down_ge_30": true + }, + "retry_call_reduction": 0.4, + "total_cases": 50, + "with_fixer_llm_calls": 60 + }, + "work_dir": "$WORK_DIR", + "work_dir_owned_by_runner": false +} \ No newline at end of file diff --git a/bench/results_static_fixers.md b/bench/results_static_fixers.md new file mode 100644 index 0000000..aae8cbb --- /dev/null +++ b/bench/results_static_fixers.md @@ -0,0 +1,73 @@ +# Static Fixers Benchmark + +synthetic verify-loop fixer benchmark; package installs and LLM generation are faked; does not replace the full 50-scratch gate + +## Summary + +- cases: 50/50 passed +- fixed before LLM retry: 80.00% +- baseline LLM calls: 100 +- with-fixer LLM calls: 60 +- retry-call reduction: 40.00% + +## Release Gate Status + +- fifty_cases: True +- fixer_resolved_ge_80: True +- retry_calls_down_ge_30: True +- real_scratch_corpus: False + +## Cases + +| case | fixed_before_retry | baseline_calls | with_fixer_calls | passed | +| --- | --- | ---: | ---: | --- | +| missing-pip-01 | True | 2 | 1 | True | +| missing-pip-02 | True | 2 | 1 | True | +| missing-pip-03 | True | 2 | 1 | True | +| missing-pip-04 | True | 2 | 1 | True | +| missing-pip-05 | True | 2 | 1 | True | +| missing-pip-06 | True | 2 | 1 | True | +| missing-pip-07 | True | 2 | 1 | True | +| missing-pip-08 | True | 2 | 1 | True | +| missing-pip-09 | True | 2 | 1 | True | +| missing-pip-10 | True | 2 | 1 | True | +| missing-pip-11 | True | 2 | 1 | True | +| missing-pip-12 | True | 2 | 1 | True | +| missing-pip-13 | True | 2 | 1 | True | +| missing-pip-14 | True | 2 | 1 | True | +| missing-pip-15 | True | 2 | 1 | True | +| missing-pip-16 | True | 2 | 1 | True | +| missing-pip-17 | True | 2 | 1 | True | +| missing-pip-18 | True | 2 | 1 | True | +| missing-pip-19 | True | 2 | 1 | True | +| missing-pip-20 | True | 2 | 1 | True | +| missing-pip-21 | True | 2 | 1 | True | +| missing-pip-22 | True | 2 | 1 | True | +| missing-pip-23 | True | 2 | 1 | True | +| missing-pip-24 | True | 2 | 1 | True | +| missing-pip-25 | True | 2 | 1 | True | +| missing-pip-26 | True | 2 | 1 | True | +| missing-pip-27 | True | 2 | 1 | True | +| missing-pip-28 | True | 2 | 1 | True | +| missing-pip-29 | True | 2 | 1 | True | +| missing-pip-30 | True | 2 | 1 | True | +| missing-pip-31 | True | 2 | 1 | True | +| missing-pip-32 | True | 2 | 1 | True | +| missing-pip-33 | True | 2 | 1 | True | +| missing-pip-34 | True | 2 | 1 | True | +| missing-pip-35 | True | 2 | 1 | True | +| missing-pip-36 | True | 2 | 1 | True | +| missing-pip-37 | True | 2 | 1 | True | +| missing-pip-38 | True | 2 | 1 | True | +| missing-pip-39 | True | 2 | 1 | True | +| missing-pip-40 | True | 2 | 1 | True | +| assertion-01 | False | 2 | 2 | True | +| assertion-02 | False | 2 | 2 | True | +| assertion-03 | False | 2 | 2 | True | +| assertion-04 | False | 2 | 2 | True | +| assertion-05 | False | 2 | 2 | True | +| assertion-06 | False | 2 | 2 | True | +| assertion-07 | False | 2 | 2 | True | +| assertion-08 | False | 2 | 2 | True | +| assertion-09 | False | 2 | 2 | True | +| assertion-10 | False | 2 | 2 | True | diff --git a/bench/run_scratch_codegen.py b/bench/run_scratch_codegen.py new file mode 100644 index 0000000..7716e87 --- /dev/null +++ b/bench/run_scratch_codegen.py @@ -0,0 +1,485 @@ +"""Benchmark deterministic scratch codegen through the real executor path. + +This is intentionally keyless: it removes SIMPLICIO_MODEL while running so +fallbacks are reported as skipped instead of calling an LLM. The report is a +local evidence slice for the mechanical executors, not the full 50-run LLM +reduction release gate. +""" + +from __future__ import annotations + +import argparse +import json +import os +import platform +import sys +import tempfile +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from simplicio.scratch.executor import execute_plan # noqa: E402 +from simplicio.scratch.plan_schema import Plan, Task # noqa: E402 +from simplicio.scratch.stack_registry import Stack # noqa: E402 + + +RESULTS_JSON = ROOT / "bench" / "results_scratch_codegen.json" +RESULTS_MD = ROOT / "bench" / "results_scratch_codegen.md" + + +@dataclass(frozen=True) +class BenchCase: + name: str + stack_slug: str + language: str + framework: str + task: Task + seed_files: dict[str, str] + expected_executor: str + + +def build_cases(*, include_typescript: bool = True) -> list[BenchCase]: + cases = [ + BenchCase( + name="python-orm-field", + stack_slug="py-fastapi", + language="Python", + framework="FastAPI", + task=Task( + id="T01-db-model", + goal="Add email: Mapped[str] field to User model", + target="src/db/models.py", + criteria="- User has email: Mapped[str]", + constraints="- use SQLAlchemy 2.0 declarative style", + verify="pytest tests/db/test_models.py -q", + ), + seed_files={ + "src/db/models.py": """from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + pass + + +class User(Base): + __tablename__ = "users" + + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] +""" + }, + expected_executor="python-add-orm-field", + ), + BenchCase( + name="python-pydantic-schema", + stack_slug="py-fastapi", + language="Python", + framework="FastAPI", + task=Task( + id="T02-api-schemas", + goal="Create Pydantic schemas for User create, update, and read flows.", + target="src/api/schemas/user.py", + criteria=( + "- UserCreate, UserUpdate, and UserRead schemas exist\n" + "- optional update fields are supported" + ), + constraints="- keep schemas framework-agnostic", + verify="pytest tests/api/test_users.py -q", + ), + seed_files={ + "src/db/user.py": """from datetime import datetime + +from sqlalchemy import func +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + pass + + +class User(Base): + __tablename__ = "users" + + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] + email: Mapped[str | None] = mapped_column(nullable=True) + created_at: Mapped[datetime] = mapped_column(default=func.now()) +""" + }, + expected_executor="python-add-pydantic-schema", + ), + BenchCase( + name="python-fastapi-route", + stack_slug="py-fastapi", + language="Python", + framework="FastAPI", + task=Task( + id="T03-api-route", + goal="Add GET `/users/{id}` endpoint to the users route", + target="src/api/users.py", + criteria="- exposes @router.get with async handler and return type", + constraints="- keep existing imports", + verify="pytest tests/api/test_users.py -q", + ), + seed_files={ + "src/api/users.py": """from fastapi import APIRouter + +router = APIRouter() +""" + }, + expected_executor="python-add-fastapi-route", + ), + BenchCase( + name="python-pytest-test", + stack_slug="py-fastapi", + language="Python", + framework="FastAPI", + task=Task( + id="T04-pytest", + goal="Generate a happy-path pytest for function double in src/utils/math_ops.py", + target="tests/unit/test_math_ops.py", + criteria="- imports the function under test\n- has a sane assert", + constraints="- use pytest", + verify="pytest tests/unit/test_math_ops.py -q", + ), + seed_files={ + "pyproject.toml": """[tool.pytest.ini_options] +pythonpath = ["src"] +""", + "src/utils/math_ops.py": """def double(value: int) -> int: + return value * 2 +""", + }, + expected_executor="python-add-pytest-test", + ), + ] + + if include_typescript: + cases.append( + BenchCase( + name="typescript-next-route", + stack_slug="ts-nextjs", + language="TypeScript 5", + framework="Next.js app router", + task=Task( + id="T05-next-route", + goal="Create Next.js route handlers for Unit CRUD", + target="src/app/api/units/route.ts", + criteria="- exports GET and POST handlers\n- returns JSON responses", + constraints="- no external dependencies", + verify="pnpm vitest run src/app/api/units/route.test.ts", + ), + seed_files={}, + expected_executor="typescript-add-next-route", + ) + ) + + return cases + + +def run_benchmark( + *, + work_dir: Path | None = None, + repeat: int = 10, + include_typescript: bool = True, +) -> dict[str, Any]: + if repeat < 1: + raise ValueError("repeat must be >= 1") + + owned_temp = False + if work_dir is None: + work_dir = Path(tempfile.mkdtemp(prefix="simplicio-scratch-codegen-")) + owned_temp = True + work_dir.mkdir(parents=True, exist_ok=True) + + cases = build_cases(include_typescript=include_typescript) + projects_parent = work_dir / "projects" + templates_parent = work_dir / "templates" + projects_parent.mkdir(parents=True, exist_ok=True) + templates_parent.mkdir(parents=True, exist_ok=True) + + old_model = os.environ.pop("SIMPLICIO_MODEL", None) + rows: list[dict[str, Any]] = [] + t0 = time.perf_counter() + try: + for run_index in range(1, repeat + 1): + for case in cases: + rows.append( + _run_case( + case, + run_index=run_index, + projects_parent=projects_parent, + templates_parent=templates_parent, + ) + ) + finally: + if old_model is not None: + os.environ["SIMPLICIO_MODEL"] = old_model + + elapsed_s = round(time.perf_counter() - t0, 3) + return { + "benchmark": "scratch-codegen", + "scope": ( + "synthetic deterministic executor benchmark; no LLM calls; " + "does not replace the full 50-scratch release gate" + ), + "work_dir": "$WORK_DIR", + "work_dir_owned_by_runner": owned_temp, + "repeat": repeat, + "include_typescript": include_typescript, + "environment": { + "python": sys.version.split()[0], + "platform": platform.platform(), + }, + "summary": _summarize(rows, elapsed_s), + "cases": rows, + } + + +def _run_case( + case: BenchCase, + *, + run_index: int, + projects_parent: Path, + templates_parent: Path, +) -> dict[str, Any]: + project_name = f"{case.name}-r{run_index:02d}" + template_root = templates_parent / project_name + _write_seed_tree(template_root / "tree", case.seed_files) + stack = Stack( + slug=case.stack_slug, + path=template_root, + meta={ + "language": case.language, + "framework": case.framework, + "template_version": "bench-scratch-codegen-v1", + }, + ) + plan = _plan_for_case(case, project_name) + + try: + report = execute_plan(plan, stack, projects_parent, skip_install=True) + except Exception as exc: # pragma: no cover - defensive bench reporting + return { + "name": case.name, + "run_index": run_index, + "stack": case.stack_slug, + "expected_executor": case.expected_executor, + "passed": False, + "error": f"{type(exc).__name__}: {exc}", + } + + task = report.task_results[0] if report.task_results else None + executor = task.codegen_executor if task is not None else None + expected_match = executor == case.expected_executor + task_passed = bool(task and task.passed) + work_dir = projects_parent.parent + return { + "name": case.name, + "run_index": run_index, + "stack": case.stack_slug, + "project_dir": _redact_path(report.project_dir, work_dir), + "expected_executor": case.expected_executor, + "actual_executor": executor, + "expected_executor_match": expected_match, + "passed": task_passed and expected_match, + "task_passed": task_passed, + "execution_mode": task.execution_mode if task is not None else "missing", + "duration_ms": task.duration_ms if task is not None else 0, + "metrics": report.metrics, + "log_tail": _redact_text(task.log_tail[-300:], work_dir) + if task is not None + else "", + } + + +def _write_seed_tree(tree: Path, seed_files: dict[str, str]) -> None: + for rel_path, content in seed_files.items(): + path = tree / rel_path + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + + +def _plan_for_case(case: BenchCase, project_name: str) -> Plan: + return Plan( + version="1.0", + stack=case.stack_slug, + project_name=project_name, + rationale=f"Benchmark deterministic executor {case.expected_executor}.", + files_to_create=[], + tasks=[case.task], + deps_to_install=[], + deps_dev=[], + test_command=case.task.verify, + lint_command="", + estimated_total_tasks=1, + ) + + +def _summarize(rows: list[dict[str, Any]], elapsed_s: float) -> dict[str, Any]: + total_cases = len(rows) + passed_cases = sum(1 for row in rows if row.get("passed")) + codegen_tasks = sum( + int(row.get("metrics", {}).get("tasks_codegen", 0)) for row in rows + ) + llm_tasks = sum(int(row.get("metrics", {}).get("tasks_llm", 0)) for row in rows) + skipped_tasks = sum( + int(row.get("metrics", {}).get("tasks_skipped", 0)) for row in rows + ) + failed_tasks = sum( + int(row.get("metrics", {}).get("tasks_failed", 0)) for row in rows + ) + total_tasks = codegen_tasks + llm_tasks + skipped_tasks + failed_tasks + matched = sum(1 for row in rows if row.get("expected_executor_match")) + codegen_durations = [ + int(row.get("duration_ms", 0)) + for row in rows + if row.get("execution_mode") == "codegen" + ] + summary = { + "total_cases": total_cases, + "passed_cases": passed_cases, + "failed_cases": total_cases - passed_cases, + "pass_rate": _ratio(passed_cases, total_cases), + "expected_executor_match_rate": _ratio(matched, total_cases), + "total_tasks": total_tasks, + "tasks_codegen": codegen_tasks, + "tasks_llm": llm_tasks, + "tasks_skipped": skipped_tasks, + "tasks_failed": failed_tasks, + "codegen_share": _ratio(codegen_tasks, total_tasks), + "avg_codegen_ms": _avg(codegen_durations), + "elapsed_s": elapsed_s, + "planner_calls": 0, + "llm_calls": llm_tasks, + } + summary["release_gates"] = { + "fifty_runs": total_cases >= 50, + "mechanical_share_ge_30": summary["codegen_share"] >= 0.30, + "executor_pass_rate_100": summary["pass_rate"] == 1.0, + "llm_baseline_present": False, + "latency_reduction_ge_50": None, + } + summary["missing_release_evidence"] = [ + "LLM baseline pass-rate and latency comparison", + "50 real scratch goals across the release corpus", + "planner cache hit-rate measured across cold/warm scratch runs", + ] + return summary + + +def _ratio(numerator: int, denominator: int) -> float: + return round(numerator / denominator, 4) if denominator else 0.0 + + +def _avg(values: list[int]) -> int: + return round(sum(values) / len(values)) if values else 0 + + +def _redact_path(path: Path, root: Path) -> str: + try: + rel = path.relative_to(root) + except ValueError: + return _redact_text(str(path), root) + return "$WORK_DIR/" + rel.as_posix() + + +def _redact_text(text: str, root: Path) -> str: + root_text = str(root) + return text.replace(root_text, "$WORK_DIR").replace( + root_text.replace("\\", "/"), + "$WORK_DIR", + ) + + +def write_reports(result: dict[str, Any], json_path: Path, md_path: Path) -> None: + json_path.parent.mkdir(parents=True, exist_ok=True) + md_path.parent.mkdir(parents=True, exist_ok=True) + json_path.write_text(json.dumps(result, indent=2, sort_keys=True), encoding="utf-8") + md_path.write_text(_to_markdown(result), encoding="utf-8") + + +def _to_markdown(result: dict[str, Any]) -> str: + summary = result["summary"] + lines = [ + "# Scratch Codegen Benchmark", + "", + result["scope"], + "", + "## Summary", + "", + f"- cases: {summary['passed_cases']}/{summary['total_cases']} passed", + f"- codegen share: {summary['codegen_share']:.2%}", + f"- expected executor match: {summary['expected_executor_match_rate']:.2%}", + f"- avg codegen latency: {summary['avg_codegen_ms']} ms", + f"- planner calls: {summary['planner_calls']}", + f"- llm calls: {summary['llm_calls']}", + "", + "## Release Gate Status", + "", + ] + for gate, value in summary["release_gates"].items(): + lines.append(f"- {gate}: {value}") + lines.extend( + [ + "", + "## Cases", + "", + "| case | stack | executor | mode | passed | duration_ms |", + "| --- | --- | --- | --- | --- | ---: |", + ] + ) + for row in result["cases"]: + lines.append( + "| {name} r{run_index:02d} | {stack} | {executor} | {mode} | {passed} | {duration} |".format( + name=row["name"], + run_index=row["run_index"], + stack=row["stack"], + executor=row.get("actual_executor") or "", + mode=row.get("execution_mode") or "", + passed=row.get("passed"), + duration=row.get("duration_ms", 0), + ) + ) + lines.append("") + return "\n".join(lines) + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--repeat", type=int, default=10) + parser.add_argument("--work-dir", type=Path) + parser.add_argument("--json-output", type=Path, default=RESULTS_JSON) + parser.add_argument("--md-output", type=Path, default=RESULTS_MD) + parser.add_argument( + "--no-typescript", + action="store_true", + help="Skip the Next.js ts-morph executor case.", + ) + parser.add_argument("--quiet", action="store_true") + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv) + result = run_benchmark( + work_dir=args.work_dir, + repeat=args.repeat, + include_typescript=not args.no_typescript, + ) + write_reports(result, args.json_output, args.md_output) + if not args.quiet: + print(json.dumps(result["summary"], indent=2, sort_keys=True)) + print(f"wrote {args.json_output}") + print(f"wrote {args.md_output}") + return 0 if result["summary"]["failed_cases"] == 0 else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/bench/run_static_fixers.py b/bench/run_static_fixers.py new file mode 100644 index 0000000..073b0ba --- /dev/null +++ b/bench/run_static_fixers.py @@ -0,0 +1,313 @@ +"""Benchmark static verify-loop fixers with a synthetic retry corpus. + +The runner exercises the real pipeline retry loop and real static fixer +dispatch while replacing external package installs and LLM generation with +deterministic fakes. It proves the measurement path for lever C, not the full +50-real-scratch release gate. +""" + +from __future__ import annotations + +import argparse +import json +import platform +import subprocess +import sys +import tempfile +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from simplicio import pipeline # noqa: E402 +from simplicio.pipeline_fixers import FixerResult # noqa: E402 +from simplicio.pipeline_fixers import try_static_fixers as real_try_static_fixers # noqa: E402 + + +RESULTS_JSON = ROOT / "bench" / "results_static_fixers.json" +RESULTS_MD = ROOT / "bench" / "results_static_fixers.md" + + +@dataclass(frozen=True) +class FixerCase: + name: str + failure_log: str + resolvable: bool + + +def build_cases() -> list[FixerCase]: + cases: list[FixerCase] = [] + for idx in range(1, 41): + cases.append( + FixerCase( + name=f"missing-pip-{idx:02d}", + failure_log="ModuleNotFoundError: No module named 'fastapi'", + resolvable=True, + ) + ) + for idx in range(1, 11): + cases.append( + FixerCase( + name=f"assertion-{idx:02d}", + failure_log="AssertionError: expected 200 got 500", + resolvable=False, + ) + ) + return cases + + +def run_benchmark(*, work_dir: Path | None = None) -> dict[str, Any]: + owned_temp = False + if work_dir is None: + work_dir = Path(tempfile.mkdtemp(prefix="simplicio-static-fixers-")) + owned_temp = True + work_dir.mkdir(parents=True, exist_ok=True) + + old_generate = pipeline.generate + old_build_prompt = pipeline.build_prompt + old_apply_and_test = pipeline._apply_and_test + old_try_static_fixers = pipeline.try_static_fixers + + rows: list[dict[str, Any]] = [] + t0 = time.perf_counter() + try: + for case in build_cases(): + baseline = _run_case(case, work_dir / "baseline" / case.name, False) + with_fixer = _run_case(case, work_dir / "with-fixer" / case.name, True) + rows.append(_row(case, baseline, with_fixer)) + finally: + pipeline.generate = old_generate + pipeline.build_prompt = old_build_prompt + pipeline._apply_and_test = old_apply_and_test + pipeline.try_static_fixers = old_try_static_fixers + + elapsed_s = round(time.perf_counter() - t0, 3) + return { + "benchmark": "static-fixers", + "scope": ( + "synthetic verify-loop fixer benchmark; package installs and LLM " + "generation are faked; does not replace the full 50-scratch gate" + ), + "work_dir": "$WORK_DIR", + "work_dir_owned_by_runner": owned_temp, + "environment": { + "python": sys.version.split()[0], + "platform": platform.platform(), + }, + "summary": _summarize(rows, elapsed_s), + "cases": rows, + } + + +def _run_case(case: FixerCase, root: Path, fixer_enabled: bool) -> dict[str, Any]: + root.mkdir(parents=True, exist_ok=True) + (root / "pyproject.toml").write_text( + '[project]\nname = "demo"\ndependencies = []\n', + encoding="utf-8", + ) + + generate_calls: list[str | None] = [] + fixer_calls: list[FixerResult] = [] + + def fake_generate(prompt: str, feedback: str | None = None) -> str: + generate_calls.append(feedback) + attempt = "SECOND_ATTEMPT" if len(generate_calls) > 1 else "FIRST_ATTEMPT" + return _valid_pipeline_diff(attempt) + + def fake_apply_and_test( + output: str, + run_root: str, + bound_paths: list[str] | None = None, + ) -> tuple[bool, str]: + text = output or "" + pyproject = Path(run_root) / "pyproject.toml" + if "SECOND_ATTEMPT" in text: + return True, "1 passed after LLM retry" + if case.resolvable and '"fastapi"' in pyproject.read_text(encoding="utf-8"): + return True, "1 passed after static fixer" + return False, case.failure_log + + def fake_runner( + argv: list[str], + **kwargs: Any, + ) -> subprocess.CompletedProcess[str]: + return subprocess.CompletedProcess(argv, 0, "", "") + + def patched_fixers(log: str, run_root: str | Path) -> FixerResult: + if not fixer_enabled: + result = FixerResult("none", False, "disabled synthetic baseline") + else: + result = real_try_static_fixers(log, run_root, runner=fake_runner) + fixer_calls.append(result) + return result + + pipeline.generate = fake_generate + pipeline.build_prompt = lambda *args, **kwargs: "prompt" + pipeline._apply_and_test = fake_apply_and_test + pipeline.try_static_fixers = patched_fixers + + result = pipeline.run_task( + str(root), + "python", + "add api", + "src/app.py", + "- passes", + "- small", + quiet=True, + ) + + return { + "applied": result["applied"], + "llm_calls": len(generate_calls), + "fixer_calls": len(fixer_calls), + "fixer_applied": any(item.applied for item in fixer_calls), + "fixers": [item.fixer for item in fixer_calls], + } + + +def _row( + case: FixerCase, + baseline: dict[str, Any], + with_fixer: dict[str, Any], +) -> dict[str, Any]: + return { + "name": case.name, + "resolvable": case.resolvable, + "baseline_llm_calls": baseline["llm_calls"], + "with_fixer_llm_calls": with_fixer["llm_calls"], + "retry_calls_saved": baseline["llm_calls"] - with_fixer["llm_calls"], + "fixed_before_llm_retry": bool( + with_fixer["fixer_applied"] and with_fixer["llm_calls"] == 1 + ), + "fixers": with_fixer["fixers"], + "passed": baseline["applied"] and with_fixer["applied"], + } + + +def _summarize(rows: list[dict[str, Any]], elapsed_s: float) -> dict[str, Any]: + total = len(rows) + passed = sum(1 for row in rows if row["passed"]) + fixed = sum(1 for row in rows if row["fixed_before_llm_retry"]) + baseline_calls = sum(int(row["baseline_llm_calls"]) for row in rows) + with_fixer_calls = sum(int(row["with_fixer_llm_calls"]) for row in rows) + reduction = ( + (baseline_calls - with_fixer_calls) / baseline_calls if baseline_calls else 0.0 + ) + return { + "total_cases": total, + "passed_cases": passed, + "fixer_resolved_before_retry": fixed, + "fixer_resolved_rate": round(fixed / total, 4) if total else 0.0, + "baseline_llm_calls": baseline_calls, + "with_fixer_llm_calls": with_fixer_calls, + "retry_call_reduction": round(reduction, 4), + "elapsed_s": elapsed_s, + "release_gates": { + "fifty_cases": total >= 50, + "fixer_resolved_ge_80": fixed / total >= 0.80 if total else False, + "retry_calls_down_ge_30": reduction >= 0.30, + "real_scratch_corpus": False, + }, + "missing_release_evidence": [ + "real install/import/lint failures from 50 scratch runs", + "non-faked package manager execution", + "comparison across actual scratch reports", + ], + } + + +def _valid_pipeline_diff(marker: str) -> str: + return "\n".join( + [ + "diff --git a/src/app.py b/src/app.py", + "--- a/src/app.py", + "+++ b/src/app.py", + "@@ -0,0 +1 @@", + f"+print('{marker}')", + "", + "TEST: pytest -q", + ] + ) + + +def write_reports(result: dict[str, Any], json_path: Path, md_path: Path) -> None: + json_path.parent.mkdir(parents=True, exist_ok=True) + md_path.parent.mkdir(parents=True, exist_ok=True) + json_path.write_text(json.dumps(result, indent=2, sort_keys=True), encoding="utf-8") + md_path.write_text(_to_markdown(result), encoding="utf-8") + + +def _to_markdown(result: dict[str, Any]) -> str: + summary = result["summary"] + lines = [ + "# Static Fixers Benchmark", + "", + result["scope"], + "", + "## Summary", + "", + f"- cases: {summary['passed_cases']}/{summary['total_cases']} passed", + f"- fixed before LLM retry: {summary['fixer_resolved_rate']:.2%}", + f"- baseline LLM calls: {summary['baseline_llm_calls']}", + f"- with-fixer LLM calls: {summary['with_fixer_llm_calls']}", + f"- retry-call reduction: {summary['retry_call_reduction']:.2%}", + "", + "## Release Gate Status", + "", + ] + for gate, value in summary["release_gates"].items(): + lines.append(f"- {gate}: {value}") + lines.extend( + [ + "", + "## Cases", + "", + "| case | fixed_before_retry | baseline_calls | with_fixer_calls | passed |", + "| --- | --- | ---: | ---: | --- |", + ] + ) + for row in result["cases"]: + lines.append( + "| {name} | {fixed} | {baseline} | {with_fixer} | {passed} |".format( + name=row["name"], + fixed=row["fixed_before_llm_retry"], + baseline=row["baseline_llm_calls"], + with_fixer=row["with_fixer_llm_calls"], + passed=row["passed"], + ) + ) + lines.append("") + return "\n".join(lines) + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--work-dir", type=Path) + parser.add_argument("--json-output", type=Path, default=RESULTS_JSON) + parser.add_argument("--md-output", type=Path, default=RESULTS_MD) + parser.add_argument("--quiet", action="store_true") + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv) + result = run_benchmark(work_dir=args.work_dir) + write_reports(result, args.json_output, args.md_output) + if not args.quiet: + print(json.dumps(result["summary"], indent=2, sort_keys=True)) + print(f"wrote {args.json_output}") + print(f"wrote {args.md_output}") + return ( + 0 + if result["summary"]["passed_cases"] == result["summary"]["total_cases"] + else 1 + ) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyproject.toml b/pyproject.toml index 79bb5d5..71fda60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,15 @@ where = ["."] include = ["simplicio*"] [tool.setuptools.package-data] -simplicio = ["templates/*.md", "templates/*.sh"] +simplicio = [ + "templates/*.md", + "templates/*.sh", + "templates/recipes/**/*.json", + "templates/recipes/**/*.yaml", + "templates/recipes/**/*.yml", + "templates/stacks/**/*", + "templates/stacks/*/tree/.gitignore", +] [tool.pytest.ini_options] pythonpath = ["."] diff --git a/simplicio/_cache.py b/simplicio/_cache.py index 292e47e..744b55b 100644 --- a/simplicio/_cache.py +++ b/simplicio/_cache.py @@ -87,11 +87,16 @@ def __init__( ) -> None: self.root = Path(root) if root is not None else _cache_root() self.ttl_days = ( - ttl_days if ttl_days is not None else _env_float("SIMPLICIO_CACHE_TTL_DAYS", 30) + ttl_days + if ttl_days is not None + else _env_float("SIMPLICIO_CACHE_TTL_DAYS", 30) ) self.max_mb = ( max_mb if max_mb is not None else _env_float("SIMPLICIO_CACHE_MAX_MB", 500) ) + self.hits = 0 + self.misses = 0 + self.puts = 0 @property def enabled(self) -> bool: @@ -106,21 +111,28 @@ def path_for(self, key: str) -> Path: def get(self, key: str) -> Optional[CacheEntry]: if not self.enabled or self.bust: + self.misses += 1 return None path = self.path_for(key) try: if not path.exists(): + self.misses += 1 return None if self._is_expired(path): self._safe_unlink(path) + self.misses += 1 return None except OSError: + self.misses += 1 return None try: with path.open("r", encoding="utf-8") as handle: - return CacheEntry.from_dict(json.load(handle)) + entry = CacheEntry.from_dict(json.load(handle)) + self.hits += 1 + return entry except (OSError, ValueError, TypeError): self._safe_unlink(path) + self.misses += 1 return None def put(self, key: str, entry: CacheEntry) -> None: @@ -146,6 +158,7 @@ def put(self, key: str, entry: CacheEntry) -> None: finally: if os.path.exists(tmp_name): self._safe_unlink(Path(tmp_name)) + self.puts += 1 self._evict_if_needed() def clear(self) -> int: @@ -166,6 +179,12 @@ def stats(self) -> Dict[str, Any]: "bust": self.bust, "root": str(self.root), "entries": len(files), + "hits": self.hits, + "misses": self.misses, + "puts": self.puts, + "hit_rate": round(self.hits / (self.hits + self.misses), 4) + if self.hits + self.misses + else 0.0, "bytes": total_bytes, "mb": round(total_bytes / (1024 * 1024), 3), "oldest_age_s": round(oldest, 3) if oldest is not None else None, diff --git a/simplicio/providers.py b/simplicio/providers.py index 18e57e1..33fb4b8 100644 --- a/simplicio/providers.py +++ b/simplicio/providers.py @@ -21,6 +21,7 @@ given SIMPLICIO_HOOK_GUARD=1 so the inner CLI does not re-trigger the simplicio UserPromptSubmit hook (recursion guard). """ + import os from ._cache import CacheEntry, cache, make_key @@ -29,18 +30,22 @@ def _cfg(): return { "model": os.environ.get("SIMPLICIO_MODEL"), - "base": os.environ.get("SIMPLICIO_BASE_URL"), - "key": os.environ.get("SIMPLICIO_API_KEY") - or os.environ.get("OPENROUTER_API_KEY") - or os.environ.get("ANTHROPIC_API_KEY"), + "base": os.environ.get("SIMPLICIO_BASE_URL"), + "key": os.environ.get("SIMPLICIO_API_KEY") + or os.environ.get("OPENROUTER_API_KEY") + or os.environ.get("ANTHROPIC_API_KEY"), } def _msgs(prompt, feedback): m = [{"role": "user", "content": prompt}] if feedback: - m.append({"role": "user", - "content": f"The test FAILED:\n{feedback}\nFix it. Same output format."}) + m.append( + { + "role": "user", + "content": f"The test FAILED:\n{feedback}\nFix it. Same output format.", + } + ) return m @@ -68,11 +73,16 @@ def _shell_out(cmd, label): re-run the first-run bootstrap. """ import subprocess + env = {**os.environ, "SIMPLICIO_HOOK_GUARD": "1", "SIMPLICIO_SKIP_AUTO_INIT": "1"} try: result = subprocess.run( - cmd, env=env, capture_output=True, text=True, - timeout=600, check=False, + cmd, + env=env, + capture_output=True, + text=True, + timeout=600, + check=False, ) except FileNotFoundError: raise SystemExit( @@ -84,8 +94,7 @@ def _shell_out(cmd, label): if result.returncode != 0: stderr = (result.stderr or "").strip() raise SystemExit( - f"simplicio: {label} failed (exit {result.returncode}): " - f"{stderr[:500]}" + f"simplicio: {label} failed (exit {result.returncode}): {stderr[:500]}" ) return result.stdout @@ -127,11 +136,15 @@ def generate(prompt, feedback=None, max_tokens=4000): # Path 3: shell out to a logged-in CLI. No API key needed. if model.startswith("claude-cli/"): - out = _shell_out_claude(_inline_feedback(prompt, feedback), model.split("/", 1)[1]) + out = _shell_out_claude( + _inline_feedback(prompt, feedback), model.split("/", 1)[1] + ) cache().put(key, CacheEntry(out, provider_id=provider_id, model=model)) return out if model.startswith("codex-cli/"): - out = _shell_out_codex(_inline_feedback(prompt, feedback), model.split("/", 1)[1]) + out = _shell_out_codex( + _inline_feedback(prompt, feedback), model.split("/", 1)[1] + ) cache().put(key, CacheEntry(out, provider_id=provider_id, model=model)) return out @@ -145,18 +158,22 @@ def generate(prompt, feedback=None, max_tokens=4000): # Native Anthropic path: no base_url if not c["base"]: import anthropic + cli = anthropic.Anthropic(api_key=c["key"]) - r = cli.messages.create(model=model, max_tokens=max_tokens, - messages=_msgs(prompt, feedback)) + r = cli.messages.create( + model=model, max_tokens=max_tokens, messages=_msgs(prompt, feedback) + ) out = next((b.text for b in r.content if b.type == "text"), "") cache().put(key, CacheEntry(out, provider_id=provider_id, model=model)) return out # Any OpenAI-compatible endpoint (OpenRouter, GLM, DeepSeek, local...) from openai import OpenAI + cli = OpenAI(base_url=c["base"], api_key=c["key"]) - r = cli.chat.completions.create(model=model, max_tokens=max_tokens, - messages=_msgs(prompt, feedback)) + r = cli.chat.completions.create( + model=model, max_tokens=max_tokens, messages=_msgs(prompt, feedback) + ) out = r.choices[0].message.content cache().put(key, CacheEntry(out, provider_id=provider_id, model=model)) return out @@ -169,8 +186,10 @@ def info(): return f"model={model} provider=claude-cli (shell-out, uses Claude Code OAuth) key=not-needed" if model.startswith("codex-cli/"): return f"model={model} provider=codex-cli (shell-out, uses Codex/ChatGPT login) key=not-needed" - return (f"model={model} base={c['base'] or 'anthropic-native'} " - f"key={'set' if c['key'] else 'MISSING'}") + return ( + f"model={model} base={c['base'] or 'anthropic-native'} " + f"key={'set' if c['key'] else 'MISSING'}" + ) # --------------------------------------------------------------------------- # @@ -198,11 +217,11 @@ def info(): # planner when the user already has an HF account. "deepseek-hf": ("https://router.huggingface.co/v1", "HF_TOKEN"), # DeepSeek's own API (paid, no HF middleman). Pin via `deepseek/`. - "deepseek": ("https://api.deepseek.com/v1", "DEEPSEEK_API_KEY"), - "openai": ("https://api.openai.com/v1", "OPENAI_API_KEY"), - "openrouter": ("https://openrouter.ai/api/v1", "OPENROUTER_API_KEY"), + "deepseek": ("https://api.deepseek.com/v1", "DEEPSEEK_API_KEY"), + "openai": ("https://api.openai.com/v1", "OPENAI_API_KEY"), + "openrouter": ("https://openrouter.ai/api/v1", "OPENROUTER_API_KEY"), # Generic HF route for any non-DeepSeek model on the HF router (Qwen, Llama, ...). - "hf": ("https://router.huggingface.co/v1", "HF_TOKEN"), + "hf": ("https://router.huggingface.co/v1", "HF_TOKEN"), } # Default planner. DeepSeek-V3.1 on HF is the current "frontier model with a @@ -212,7 +231,7 @@ def info(): _DEFAULT_PLANNER = "deepseek-hf/deepseek-ai/DeepSeek-V3.1" -def planner_cfg(): +def planner_cfg(require_key=True): """Resolve the planner provider config without touching the doer config. Returns a dict with keys: model, base, key, native_anthropic, shell_out. @@ -223,8 +242,13 @@ def planner_cfg(): raw = _DEFAULT_PLANNER if raw.startswith("claude-cli/") or raw.startswith("codex-cli/"): - return {"model": raw, "base": None, "key": None, - "native_anthropic": False, "shell_out": True} + return { + "model": raw, + "base": None, + "key": None, + "native_anthropic": False, + "shell_out": True, + } if "/" in raw: prefix, name = raw.split("/", 1) @@ -233,61 +257,132 @@ def planner_cfg(): if prefix == "anthropic": key = os.environ.get("ANTHROPIC_API_KEY") - if not key: - raise SystemExit( - "SIMPLICIO_PLANNER=anthropic/* requires ANTHROPIC_API_KEY") - return {"model": name, "base": None, "key": key, - "native_anthropic": True, "shell_out": False} + if not key and require_key: + raise SystemExit("SIMPLICIO_PLANNER=anthropic/* requires ANTHROPIC_API_KEY") + return { + "model": name, + "base": None, + "key": key, + "native_anthropic": True, + "shell_out": False, + } if prefix in _PLANNER_ROUTES: base, env_key = _PLANNER_ROUTES[prefix] key = os.environ.get(env_key) - if not key: - raise SystemExit( - f"SIMPLICIO_PLANNER={raw} requires {env_key}") - return {"model": name, "base": base, "key": key, - "native_anthropic": False, "shell_out": False} + if not key and require_key: + raise SystemExit(f"SIMPLICIO_PLANNER={raw} requires {env_key}") + return { + "model": name, + "base": base, + "key": key, + "native_anthropic": False, + "shell_out": False, + } # Bare model name — fall back to the same provider config the doer uses. # Lets the user run planner against whatever they already configured. c = _cfg() - return {"model": raw, "base": c["base"], "key": c["key"], - "native_anthropic": not c["base"], "shell_out": False} + return { + "model": raw, + "base": c["base"], + "key": c["key"], + "native_anthropic": not c["base"], + "shell_out": False, + } -def planner_complete(prompt, max_tokens=8192, temperature=0.1): +def _planner_provider_id(cfg): + model = cfg["model"] + if model.startswith("claude-cli/"): + return "planner:claude-cli" + if model.startswith("codex-cli/"): + return "planner:codex-cli" + if cfg["native_anthropic"]: + return "planner:anthropic-native" + if cfg["base"]: + return f"planner:openai-compatible:{cfg['base'].rstrip('/')}" + return "planner:unknown" + + +def _planner_cache_key(cfg, prompt, max_tokens, temperature, template_version): + return make_key( + _planner_provider_id(cfg), + cfg["model"], + prompt, + max_tokens=max_tokens, + temperature=temperature, + template_version=template_version, + ) + + +def planner_complete(prompt, max_tokens=8192, temperature=0.1, template_version=None): """Call the planner provider. Used by simplicio.scratch.planner. temperature defaults to 0.1 because plans must be reproducible and schema-stable, not creative. """ - p = planner_cfg() + p = planner_cfg(require_key=False) + key = _planner_cache_key(p, prompt, max_tokens, temperature, template_version) + cached = cache().get(key) + if cached is not None: + return cached.completion if p["shell_out"]: + provider_id = _planner_provider_id(p) if p["model"].startswith("claude-cli/"): - return _shell_out_claude(prompt, p["model"].split("/", 1)[1]) + out = _shell_out_claude(prompt, p["model"].split("/", 1)[1]) + cache().put( + key, + CacheEntry(out, provider_id=provider_id, model=p["model"]), + ) + return out if p["model"].startswith("codex-cli/"): - return _shell_out_codex(prompt, p["model"].split("/", 1)[1]) + out = _shell_out_codex(prompt, p["model"].split("/", 1)[1]) + cache().put( + key, + CacheEntry(out, provider_id=provider_id, model=p["model"]), + ) + return out + + if not p["key"]: + raise SystemExit( + "no planner credentials: set SIMPLICIO_PLANNER + matching API key " + "(default planner is deepseek-hf/deepseek-ai/DeepSeek-V3.1 -> HF_TOKEN)" + ) if p["native_anthropic"]: import anthropic + cli = anthropic.Anthropic(api_key=p["key"]) r = cli.messages.create( - model=p["model"], max_tokens=max_tokens, temperature=temperature, - messages=[{"role": "user", "content": prompt}]) - return next((b.text for b in r.content if b.type == "text"), "") - - if not p["key"]: - raise SystemExit( - "no planner credentials: set SIMPLICIO_PLANNER + matching API key " - "(default planner is deepseek/deepseek-v4-pro -> DEEPSEEK_API_KEY)") + model=p["model"], + max_tokens=max_tokens, + temperature=temperature, + messages=[{"role": "user", "content": prompt}], + ) + out = next((b.text for b in r.content if b.type == "text"), "") + cache().put( + key, + CacheEntry(out, provider_id=_planner_provider_id(p), model=p["model"]), + ) + return out from openai import OpenAI + cli = OpenAI(base_url=p["base"], api_key=p["key"]) r = cli.chat.completions.create( - model=p["model"], max_tokens=max_tokens, temperature=temperature, - messages=[{"role": "user", "content": prompt}]) - return r.choices[0].message.content + model=p["model"], + max_tokens=max_tokens, + temperature=temperature, + messages=[{"role": "user", "content": prompt}], + ) + out = r.choices[0].message.content + cache().put( + key, + CacheEntry(out, provider_id=_planner_provider_id(p), model=p["model"]), + ) + return out def planner_info(): @@ -296,5 +391,6 @@ def planner_info(): return f"planner={p['model']} (shell-out)" if p["native_anthropic"]: return f"planner={p['model']} provider=anthropic-native key={'set' if p['key'] else 'MISSING'}" - return (f"planner={p['model']} base={p['base']} " - f"key={'set' if p['key'] else 'MISSING'}") + return ( + f"planner={p['model']} base={p['base']} key={'set' if p['key'] else 'MISSING'}" + ) diff --git a/simplicio/scratch/executor.py b/simplicio/scratch/executor.py index 51e6e1c..5921e84 100644 --- a/simplicio/scratch/executor.py +++ b/simplicio/scratch/executor.py @@ -39,6 +39,7 @@ class TaskResult: skipped_reason: Optional[str] = None duration_ms: int = 0 log_tail: str = "" + generated_skill: Optional[str] = None @dataclass @@ -97,6 +98,7 @@ def to_dict(self) -> dict: "skipped": t.skipped_reason, "duration_ms": t.duration_ms, "log_tail": t.log_tail[-400:], + "generated_skill": t.generated_skill, } for t in self.task_results ], @@ -161,12 +163,30 @@ def _execute_one_task(task: Task, project_dir: Path, stack: Stack) -> TaskResult simplicio.pipeline via the adapter.""" t0 = time.perf_counter() codegen_log = "" + skill_log, generated_skill = _ensure_required_skill(task, project_dir) + if skill_log.startswith("skill-opt failed:"): + ms = int((time.perf_counter() - t0) * 1000) + return TaskResult( + id=task.id, + target=task.target, + passed=False, + duration_ms=ms, + execution_mode="failed", + skipped_reason="required skill generation failed", + log_tail=skill_log, + ) codegen_result = try_execute(task, project_dir, stack) if codegen_result is not None: codegen_log = codegen_result.log if codegen_result.passed or not codegen_result.fallback_to_llm: - return _task_result_from_codegen(task, t0, codegen_result) + return _task_result_from_codegen( + task, + t0, + codegen_result, + skill_log=skill_log, + generated_skill=generated_skill, + ) if not os.environ.get("SIMPLICIO_MODEL"): # smoke-test mode: log the task but mark as skipped (no LLM call made) @@ -181,7 +201,8 @@ def _execute_one_task(task: Task, project_dir: Path, stack: Stack) -> TaskResult duration_ms=ms, execution_mode="skipped", skipped_reason="no SIMPLICIO_MODEL set; task generation skipped", - log_tail=f"{fallback_note}goal={task.goal[:200]}", + log_tail=f"{skill_log}{fallback_note}goal={task.goal[:200]}", + generated_skill=generated_skill, ) try: @@ -195,11 +216,15 @@ def _execute_one_task(task: Task, project_dir: Path, stack: Stack) -> TaskResult duration_ms=ms, execution_mode="failed", skipped_reason=f"adapter import failed: {e}", + log_tail=skill_log, + generated_skill=generated_skill, ) passed, log = run_task(task, project_dir, stack) if codegen_log: log = f"codegen fallback: {codegen_log}\n\n{log}" + if skill_log: + log = f"{skill_log}{log}" ms = int((time.perf_counter() - t0) * 1000) return TaskResult( id=task.id, @@ -208,11 +233,17 @@ def _execute_one_task(task: Task, project_dir: Path, stack: Stack) -> TaskResult execution_mode="llm" if passed else "failed", duration_ms=ms, log_tail=log, + generated_skill=generated_skill, ) def _task_result_from_codegen( - task: Task, started_at: float, result: CodegenResult + task: Task, + started_at: float, + result: CodegenResult, + *, + skill_log: str = "", + generated_skill: Optional[str] = None, ) -> TaskResult: ms = int((time.perf_counter() - started_at) * 1000) files = ", ".join(str(path) for path in result.files_modified) @@ -225,10 +256,37 @@ def _task_result_from_codegen( codegen_executor=result.executor_name, files_modified=[str(path) for path in result.files_modified], duration_ms=ms, - log_tail=f"{result.log}{suffix}".strip(), + log_tail=f"{skill_log}{result.log}{suffix}".strip(), + generated_skill=generated_skill, ) +def _ensure_required_skill( + task: Task, + project_dir: Path, +) -> tuple[str, Optional[str]]: + required = (task.required_skill or "").strip() + if not required: + return "", None + + from . import skill_opt + + skills_root = project_dir / ".skills" + try: + slug, markdown = skill_opt.generate_skill_doc( + required, + skills_root=skills_root, + ) + path = skill_opt.install_skill(slug, markdown, skills_root=skills_root) + except skill_opt.SkillOptError as exc: + return f"skill-opt failed: {exc}", None + except SystemExit as exc: + return f"skill-opt failed: {exc}", None + + rel = path.relative_to(project_dir).as_posix() + return f"skill-opt generated {rel} with review_required=true\n", rel + + def _avg_ms(tasks: list[TaskResult]) -> int: if not tasks: return 0 @@ -282,6 +340,11 @@ def execute_plan( "constraints": t.constraints, "verify": t.verify, "depends_on": t.depends_on, + **( + {"required_skill": t.required_skill} + if t.required_skill + else {} + ), } for t in plan.tasks ], diff --git a/simplicio/scratch/plan_schema.py b/simplicio/scratch/plan_schema.py index 5a0cb9e..bfb88da 100644 --- a/simplicio/scratch/plan_schema.py +++ b/simplicio/scratch/plan_schema.py @@ -14,7 +14,7 @@ import re from dataclasses import dataclass, field -from typing import Any +from typing import Any, Optional SCHEMA_VERSION = "1.0" @@ -29,6 +29,7 @@ class Task: constraints: str verify: str depends_on: list[str] = field(default_factory=list) + required_skill: Optional[str] = None @dataclass @@ -144,6 +145,11 @@ def validate_plan(raw: dict) -> Plan: v = _need(tr, "verify", str, errors, path) or "" d = tr.get("depends_on", []) deps = _list_of_str(d, errors, f"{path}.depends_on") + required_skill = tr.get("required_skill") + if required_skill is not None and not isinstance(required_skill, str): + errors.append( + f"{path}.required_skill must be str, got {type(required_skill).__name__}" + ) if g and t and c and co and v: tasks.append( Task( @@ -154,6 +160,7 @@ def validate_plan(raw: dict) -> Plan: constraints=co, verify=v, depends_on=deps, + required_skill=required_skill, ) ) diff --git a/simplicio/scratch/planner.py b/simplicio/scratch/planner.py index 23a6578..eead1f6 100644 --- a/simplicio/scratch/planner.py +++ b/simplicio/scratch/planner.py @@ -4,6 +4,7 @@ swappable via SIMPLICIO_PLANNER. Retries on schema validation failure with the diff fed back as feedback, up to PLANNER_MAX_RETRIES. """ + from __future__ import annotations import json @@ -37,6 +38,9 @@ class PlannerError(RuntimeError): index, kebab-case slug, max 40 chars after the dash). - `depends_on` lists IDs of tasks that must complete BEFORE this one. Use empty list for tasks with no prerequisite. +- If a task needs project-specific expertise not covered by the stack template, + set optional `required_skill` to a short plain-English capability description. + Omit it otherwise. - `estimated_total_tasks` MUST equal the length of `tasks`. - `project_name` MUST be lowercase kebab-case, starting with a letter. - Do not invent fields. Do not nest extra structure. Do not add comments. @@ -110,7 +114,7 @@ def _extract_json(text: str) -> Optional[dict]: elif ch == "}": depth -= 1 if depth == 0 and start is not None: - blob = text[start:i + 1] + blob = text[start : i + 1] try: return json.loads(blob) except json.JSONDecodeError: @@ -156,7 +160,10 @@ def generate_plan( ) try: - raw_text = planner_complete(prompt_with_feedback) + raw_text = planner_complete( + prompt_with_feedback, + template_version=stack.version, + ) except SystemExit as e: # Provider auth / config error — bubble up immediately, don't retry raise PlannerError(f"planner provider error: {e}") from e diff --git a/simplicio/templates/stacks/php-laravel/README.md b/simplicio/templates/stacks/php-laravel/README.md new file mode 100644 index 0000000..1037776 --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/README.md @@ -0,0 +1,35 @@ +# php-laravel + +PHP 8.3 + Laravel scaffold for API-first applications where the team wants +Laravel conventions, Composer packages, and PHPUnit feature tests. + +## When to use this stack + +- Laravel or PHP team conventions are required +- Backend API needs routing, controllers, validation, and feature tests +- Project expects Composer packages and Artisan workflows +- CRUD admin or business workflow service with conventional MVC boundaries + +## When NOT to use this stack + +- SSR React application - use `ts-nextjs` +- Systems programming or compact static service - use `rust-axum` or `go-gin` +- Python-first integrations - use `py-fastapi` + +## Layout produced + +``` +/ ++-- app/Http/Controllers/ ++-- bootstrap/app.php ++-- routes/api.php ++-- tests/Feature/ ++-- composer.json ++-- artisan +``` + +## Verify-loop + +- `install`: `composer install` +- `test`: `php artisan test` +- `lint`: `vendor/bin/pint --test` diff --git a/simplicio/templates/stacks/php-laravel/practices.md b/simplicio/templates/stacks/php-laravel/practices.md new file mode 100644 index 0000000..fe75150 --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/practices.md @@ -0,0 +1,30 @@ +# php-laravel best practices (planner reference) + +## Project structure + +- Put HTTP controllers under `app/Http/Controllers`. +- Register API routes in `routes/api.php`. +- Keep request validation close to controllers until a form request class is + actually reused. +- Use feature tests under `tests/Feature` for API behavior. + +## API design + +- Return JSON responses with explicit status codes. +- Keep route names resource-oriented: `units.index`, `units.store`. +- Prefer Laravel validation helpers before mutating state. +- Keep Eloquent model tasks separate from controller and route tasks. + +## Testing + +- Use `php artisan test` as the default verification command. +- Feature tests should call JSON endpoints and assert status plus response + shape. +- Every route task should add or update a focused feature test. + +## Output the planner SHOULD produce for this stack + +- Tasks order: route -> controller -> model/request -> feature test. +- Each task touches ONE file. +- `test_command` = `php artisan test` +- `lint_command` = `vendor/bin/pint --test` diff --git a/simplicio/templates/stacks/php-laravel/stack.json b/simplicio/templates/stacks/php-laravel/stack.json new file mode 100644 index 0000000..c155fc2 --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/stack.json @@ -0,0 +1,13 @@ +{ + "slug": "php-laravel", + "template_version": "0.1.0", + "language": "PHP 8.3", + "framework": "Laravel", + "framework_version": "^12.0", + "package_manager": "composer", + "test_runner": "php artisan test", + "linter": "pint", + "deps_required": ["laravel/framework:^12.0"], + "deps_dev": ["phpunit/phpunit:^11.0", "laravel/pint:^1.0"], + "tags": ["web", "api", "php", "laravel", "mvc"] +} diff --git a/simplicio/templates/stacks/php-laravel/tree/README.md b/simplicio/templates/stacks/php-laravel/tree/README.md new file mode 100644 index 0000000..ba43115 --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/README.md @@ -0,0 +1,11 @@ +# {project_name} + +Generated with the `php-laravel` Simplicio scratch stack. + +## Commands + +```bash +composer install +php artisan test +vendor/bin/pint --test +``` diff --git a/simplicio/templates/stacks/php-laravel/tree/artisan b/simplicio/templates/stacks/php-laravel/tree/artisan new file mode 100644 index 0000000..a4f9c0f --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/artisan @@ -0,0 +1,14 @@ +#!/usr/bin/env php +handleCommand(new ArgvInput); + +exit($status); diff --git a/simplicio/templates/stacks/php-laravel/tree/bootstrap/app.php b/simplicio/templates/stacks/php-laravel/tree/bootstrap/app.php new file mode 100644 index 0000000..71b547b --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/bootstrap/app.php @@ -0,0 +1,19 @@ +withRouting( + api: __DIR__.'/../routes/api.php', + commands: __DIR__.'/../routes/console.php', + health: '/up', + ) + ->withMiddleware(function (Middleware $middleware): void { + // + }) + ->withExceptions(function (Exceptions $exceptions): void { + // + }) + ->create(); diff --git a/simplicio/templates/stacks/php-laravel/tree/composer.json b/simplicio/templates/stacks/php-laravel/tree/composer.json new file mode 100644 index 0000000..713c8e5 --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/composer.json @@ -0,0 +1,29 @@ +{ + "name": "simplicio/{project_name}", + "type": "project", + "description": "Simplicio Laravel scratch project.", + "license": "MIT", + "require": { + "php": "^8.3", + "laravel/framework": "^12.0" + }, + "require-dev": { + "laravel/pint": "^1.0", + "phpunit/phpunit": "^11.0" + }, + "autoload": { + "psr-4": { + "App\\": "app/" + } + }, + "autoload-dev": { + "psr-4": { + "Tests\\": "tests/" + } + }, + "scripts": { + "test": "php artisan test" + }, + "minimum-stability": "stable", + "prefer-stable": true +} diff --git a/simplicio/templates/stacks/php-laravel/tree/phpunit.xml b/simplicio/templates/stacks/php-laravel/tree/phpunit.xml new file mode 100644 index 0000000..92a9446 --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/phpunit.xml @@ -0,0 +1,17 @@ + + + + + tests/Feature + + + + + + + + + diff --git a/simplicio/templates/stacks/php-laravel/tree/routes/api.php b/simplicio/templates/stacks/php-laravel/tree/routes/api.php new file mode 100644 index 0000000..fe87702 --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/routes/api.php @@ -0,0 +1,7 @@ + 'ok']; +})->name('health'); diff --git a/simplicio/templates/stacks/php-laravel/tree/routes/console.php b/simplicio/templates/stacks/php-laravel/tree/routes/console.php new file mode 100644 index 0000000..3d1b87d --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/routes/console.php @@ -0,0 +1,7 @@ +comment('Simplicio Laravel scratch project.'); +}); diff --git a/simplicio/templates/stacks/php-laravel/tree/tests/Feature/HealthTest.php b/simplicio/templates/stacks/php-laravel/tree/tests/Feature/HealthTest.php new file mode 100644 index 0000000..326171a --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/tests/Feature/HealthTest.php @@ -0,0 +1,15 @@ +getJson('/api/health') + ->assertOk() + ->assertJson(['status' => 'ok']); + } +} diff --git a/simplicio/templates/stacks/php-laravel/tree/tests/TestCase.php b/simplicio/templates/stacks/php-laravel/tree/tests/TestCase.php new file mode 100644 index 0000000..fe1ffc2 --- /dev/null +++ b/simplicio/templates/stacks/php-laravel/tree/tests/TestCase.php @@ -0,0 +1,10 @@ +/ ++-- src/main.rs # router, health handler, and entrypoint ++-- Cargo.toml ++-- README.md +``` + +## Verify-loop + +- `install`: `cargo fetch` +- `test`: `cargo test` +- `lint`: `cargo clippy --all-targets -- -D warnings` diff --git a/simplicio/templates/stacks/rust-axum/practices.md b/simplicio/templates/stacks/rust-axum/practices.md new file mode 100644 index 0000000..fd8c739 --- /dev/null +++ b/simplicio/templates/stacks/rust-axum/practices.md @@ -0,0 +1,30 @@ +# rust-axum best practices (planner reference) + +## Project structure + +- Keep the first scaffold small: `src/main.rs` can own router setup until the + app grows past a few routes. +- Move handlers into `src/routes/` when a resource needs more than one small + endpoint. +- Keep domain structs and validation near the handler until there is real reuse. + +## API design + +- Build routers with `Router::new().route(...)` and compose nested routers + explicitly. +- Return typed responses such as `Json` and `StatusCode` tuples. +- Derive `Serialize` for response DTOs and `Deserialize` for request DTOs. +- Keep handler functions async and focused on one route concern. + +## Testing + +- Use `tower::ServiceExt::oneshot` with `Request` for handler tests. +- Keep the first health or smoke test in `src/main.rs` with the scaffold. +- Every route task should add or update a `cargo test` passing test. + +## Output the planner SHOULD produce for this stack + +- Tasks order: router setup -> resource DTO -> handlers -> tests. +- Each task touches ONE file. +- `test_command` = `cargo test` +- `lint_command` = `cargo clippy --all-targets -- -D warnings` diff --git a/simplicio/templates/stacks/rust-axum/stack.json b/simplicio/templates/stacks/rust-axum/stack.json new file mode 100644 index 0000000..ed756be --- /dev/null +++ b/simplicio/templates/stacks/rust-axum/stack.json @@ -0,0 +1,13 @@ +{ + "slug": "rust-axum", + "template_version": "0.1.0", + "language": "Rust 1.82", + "framework": "Axum", + "framework_version": "^0.8", + "package_manager": "cargo", + "test_runner": "cargo test", + "linter": "cargo clippy", + "deps_required": ["axum@^0.8", "tokio@^1", "serde@^1"], + "deps_dev": ["tower@^0.5"], + "tags": ["web", "api", "rust", "rest", "compiled"] +} diff --git a/simplicio/templates/stacks/rust-axum/tree/Cargo.toml b/simplicio/templates/stacks/rust-axum/tree/Cargo.toml new file mode 100644 index 0000000..ea2bfe0 --- /dev/null +++ b/simplicio/templates/stacks/rust-axum/tree/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "{project_name}" +version = "0.1.0" +edition = "2021" + +[dependencies] +axum = "0.8" +serde = { version = "1", features = ["derive"] } +tokio = { version = "1", features = ["macros", "rt-multi-thread"] } + +[dev-dependencies] +tower = { version = "0.5", features = ["util"] } diff --git a/simplicio/templates/stacks/rust-axum/tree/README.md b/simplicio/templates/stacks/rust-axum/tree/README.md new file mode 100644 index 0000000..73fdcff --- /dev/null +++ b/simplicio/templates/stacks/rust-axum/tree/README.md @@ -0,0 +1,11 @@ +# {project_name} + +Generated with the `rust-axum` Simplicio scratch stack. + +## Commands + +```bash +cargo fetch +cargo test +cargo clippy --all-targets -- -D warnings +``` diff --git a/simplicio/templates/stacks/rust-axum/tree/src/main.rs b/simplicio/templates/stacks/rust-axum/tree/src/main.rs new file mode 100644 index 0000000..13be57b --- /dev/null +++ b/simplicio/templates/stacks/rust-axum/tree/src/main.rs @@ -0,0 +1,43 @@ +use axum::{routing::get, Json, Router}; +use serde::Serialize; + +#[derive(Serialize)] +struct HealthResponse { + status: &'static str, +} + +pub fn app() -> Router { + Router::new().route("/health", get(health)) +} + +async fn health() -> Json { + Json(HealthResponse { status: "ok" }) +} + +#[tokio::main] +async fn main() { + let listener = tokio::net::TcpListener::bind("0.0.0.0:3000") + .await + .expect("bind 0.0.0.0:3000"); + axum::serve(listener, app()).await.expect("serve axum app"); +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::{ + body::Body, + http::{Request, StatusCode}, + }; + use tower::ServiceExt; + + #[tokio::test] + async fn health_returns_ok() { + let response = app() + .oneshot(Request::builder().uri("/health").body(Body::empty()).unwrap()) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::OK); + } +} diff --git a/simplicio/templates/stacks/rust-axum/verify.json b/simplicio/templates/stacks/rust-axum/verify.json new file mode 100644 index 0000000..c058541 --- /dev/null +++ b/simplicio/templates/stacks/rust-axum/verify.json @@ -0,0 +1,6 @@ +{ + "install": "cargo fetch", + "test": "cargo test", + "test_runner": "cargo test", + "lint": "cargo clippy --all-targets -- -D warnings" +} diff --git a/tests/python/test_cache.py b/tests/python/test_cache.py index 0114079..7edb223 100644 --- a/tests/python/test_cache.py +++ b/tests/python/test_cache.py @@ -6,7 +6,13 @@ import pytest from simplicio import providers -from simplicio._cache import CacheEntry, CompletionCache, cache, make_key, reset_for_tests +from simplicio._cache import ( + CacheEntry, + CompletionCache, + cache, + make_key, + reset_for_tests, +) from simplicio.cli import main as cli_main @@ -77,6 +83,21 @@ def test_disabled_cache_is_noop(monkeypatch): assert c.stats()["entries"] == 0 +def test_cache_stats_track_session_hit_rate(): + c = CompletionCache() + key = make_key("p", "m", "prompt") + + assert c.get(key) is None + c.put(key, CacheEntry("cached", provider_id="p", model="m")) + assert c.get(key).completion == "cached" + + stats = c.stats() + assert stats["hits"] == 1 + assert stats["misses"] == 1 + assert stats["puts"] == 1 + assert stats["hit_rate"] == 0.5 + + def test_concurrent_writes_keep_valid_json(): c = CompletionCache() key = make_key("p", "m", "prompt") @@ -137,6 +158,33 @@ def test_provider_cache_short_circuits_missing_api_key(monkeypatch): assert providers.generate("cached prompt") == "CACHED" +def test_planner_cache_short_circuits_missing_api_key(monkeypatch): + monkeypatch.setenv("SIMPLICIO_PLANNER", "deepseek/deepseek-v4-pro") + monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False) + cfg = providers.planner_cfg(require_key=False) + key = providers._planner_cache_key( + cfg, + "cached plan", + 8192, + 0.1, + "stack-v1", + ) + cache().put( + key, + CacheEntry( + "CACHED_PLAN", + provider_id=providers._planner_provider_id(cfg), + model=cfg["model"], + ), + ) + + assert providers.planner_complete("cached plan", template_version="stack-v1") == ( + "CACHED_PLAN" + ) + with pytest.raises(SystemExit): + providers.planner_complete("cached plan", template_version="stack-v2") + + def test_provider_writes_shell_out_completion_to_cache(monkeypatch): monkeypatch.setenv("SIMPLICIO_MODEL", "claude-cli/sonnet") with patch("subprocess.run") as run: diff --git a/tests/python/test_scratch.py b/tests/python/test_scratch.py index 79ba367..625111a 100644 --- a/tests/python/test_scratch.py +++ b/tests/python/test_scratch.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import os import tempfile from pathlib import Path @@ -67,6 +68,39 @@ def test_rejects_duplicate_task_id() -> None: assert any("duplicated" in e for e in exc.value.errors) +def test_accepts_optional_required_skill_on_task() -> None: + raw = { + **EXAMPLE_PLAN, + "tasks": [ + { + **EXAMPLE_PLAN["tasks"][0], + "required_skill": "Create Liquibase migrations safely", + } + ], + } + + plan = validate_plan(raw) + + assert plan.tasks[0].required_skill == "Create Liquibase migrations safely" + + +def test_rejects_non_string_required_skill() -> None: + raw = { + **EXAMPLE_PLAN, + "tasks": [ + { + **EXAMPLE_PLAN["tasks"][0], + "required_skill": ["liquibase"], + } + ], + } + + with pytest.raises(PlanValidationError) as exc: + validate_plan(raw) + + assert any("required_skill" in e for e in exc.value.errors) + + # ----- stack_registry ----- # @@ -76,6 +110,8 @@ def test_registry_lists_pilot_stacks() -> None: assert "py-fastapi" in slugs assert "ts-nextjs" in slugs assert "go-gin" in slugs + assert "rust-axum" in slugs + assert "php-laravel" in slugs def test_registry_loads_full_metadata() -> None: @@ -95,6 +131,8 @@ def test_registry_filters_by_tag() -> None: assert "py-fastapi" in web_stacks assert "ts-nextjs" in web_stacks assert "go-gin" in web_stacks + assert "rust-axum" in web_stacks + assert "php-laravel" in web_stacks def test_registry_loads_go_gin_stack_metadata() -> None: @@ -108,6 +146,64 @@ def test_registry_loads_go_gin_stack_metadata() -> None: assert "best practices" in stack.practices.lower() +def test_registry_loads_rust_axum_stack_metadata() -> None: + reg = StackRegistry() + stack = reg.get("rust-axum") + assert stack is not None + assert stack.language.startswith("Rust") + assert stack.framework == "Axum" + assert stack.install_command == "cargo fetch" + assert stack.test_command == "cargo test" + assert "best practices" in stack.practices.lower() + + +def test_registry_loads_php_laravel_stack_metadata() -> None: + reg = StackRegistry() + stack = reg.get("php-laravel") + assert stack is not None + assert stack.language.startswith("PHP") + assert stack.framework == "Laravel" + assert stack.install_command == "composer install" + assert stack.test_command == "php artisan test" + assert "best practices" in stack.practices.lower() + + +def test_rust_axum_stack_renders_cargo_project_name() -> None: + reg = StackRegistry() + stack = reg.get("rust-axum") + assert stack is not None + + with tempfile.TemporaryDirectory() as td: + dest = Path(td) / "out" + written = stack.render_tree(dest, {"project_name": "demo-api"}) + + assert (dest / "Cargo.toml").is_file() + assert (dest / "src/main.rs").is_file() + assert ( + (dest / "Cargo.toml") + .read_text(encoding="utf-8") + .startswith('[package]\nname = "demo-api"') + ) + assert any(path.name == "main.rs" for path in written) + + +def test_php_laravel_stack_renders_composer_project_name() -> None: + reg = StackRegistry() + stack = reg.get("php-laravel") + assert stack is not None + + with tempfile.TemporaryDirectory() as td: + dest = Path(td) / "out" + written = stack.render_tree(dest, {"project_name": "demo-api"}) + + assert (dest / "composer.json").is_file() + assert (dest / "routes/api.php").is_file() + assert '"name": "simplicio/demo-api"' in (dest / "composer.json").read_text( + encoding="utf-8" + ) + assert any(path.name == "HealthTest.php" for path in written) + + def test_stack_render_tree_ignores_tool_cache_dirs() -> None: with tempfile.TemporaryDirectory() as td: root = Path(td) / "stack" @@ -135,6 +231,45 @@ def test_slugify_project_normalizes_name() -> None: assert slugify_project("123-num-only").startswith("p-") # must start with letter +def test_generate_plan_passes_stack_template_version(monkeypatch) -> None: + from simplicio.scratch import planner as planner_module + + seen = {} + stack = Stack( + slug="custom-stack", + path=Path("."), + meta={ + "language": "Python", + "framework": "FastAPI", + "template_version": "stack-v1", + }, + readme="Custom stack readme", + practices="Custom stack practices", + verify={"test_runner": "pytest"}, + ) + plan_payload = { + **EXAMPLE_PLAN, + "stack": "custom-stack", + "project_name": "cached-plan", + } + + def fake_planner_complete(prompt, **kwargs): + seen["prompt"] = prompt + seen.update(kwargs) + return json.dumps(plan_payload) + + monkeypatch.setattr(planner_module, "planner_complete", fake_planner_complete) + + plan = planner_module.generate_plan( + stack, + "Build a uniquely named planner-cache fixture", + "cached-plan", + ) + + assert plan.project_name == "cached-plan" + assert seen["template_version"] == "stack-v1" + + # ----- executor stub mode ----- # diff --git a/tests/python/test_scratch_codegen.py b/tests/python/test_scratch_codegen.py index 6815c06..bb2b1b2 100644 --- a/tests/python/test_scratch_codegen.py +++ b/tests/python/test_scratch_codegen.py @@ -102,3 +102,41 @@ def test_codegen_fallback_preserves_existing_stub_mode(tmp_path, monkeypatch): assert result.passed is False assert result.skipped_reason == "no SIMPLICIO_MODEL set; task generation skipped" assert "codegen fallback: shape unsupported" in result.log_tail + + +def test_required_skill_generates_review_gated_skill_before_stub_mode( + tmp_path, + monkeypatch, +): + task = _task() + task.required_skill = "Create Liquibase migrations safely" + monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", []) + monkeypatch.delenv("SIMPLICIO_MODEL", raising=False) + + from simplicio.scratch import skill_opt + + def fake_generate_skill_doc(description, skills_root=None, planner_model=None): + assert description == "Create Liquibase migrations safely" + assert skills_root == tmp_path / ".skills" + return ( + "liquibase-migrations", + "---\n" + "name: liquibase-migrations\n" + "description: Create migrations safely\n" + "auto_generated:\n" + " review_required: true\n" + "---\n" + "# liquibase-migrations\n", + ) + + monkeypatch.setattr(skill_opt, "generate_skill_doc", fake_generate_skill_doc) + + result = _execute_one_task(task, tmp_path, _stack(tmp_path)) + + skill_path = tmp_path / ".skills/liquibase-migrations/SKILL.md" + assert result.execution_mode == "skipped" + assert result.generated_skill == ".skills/liquibase-migrations/SKILL.md" + assert ( + "skill-opt generated .skills/liquibase-migrations/SKILL.md" in result.log_tail + ) + assert "review_required: true" in skill_path.read_text(encoding="utf-8") diff --git a/tests/python/test_scratch_codegen_bench.py b/tests/python/test_scratch_codegen_bench.py new file mode 100644 index 0000000..63d823b --- /dev/null +++ b/tests/python/test_scratch_codegen_bench.py @@ -0,0 +1,47 @@ +"""Tests for the scratch codegen benchmark harness.""" + +from __future__ import annotations + +from bench.run_scratch_codegen import build_cases, run_benchmark, write_reports + + +def test_scratch_codegen_bench_cases_cover_python_executors() -> None: + cases = build_cases(include_typescript=False) + + assert {case.expected_executor for case in cases} == { + "python-add-fastapi-route", + "python-add-orm-field", + "python-add-pydantic-schema", + "python-add-pytest-test", + } + + +def test_scratch_codegen_bench_runs_keyless_python_cases(tmp_path) -> None: + result = run_benchmark( + work_dir=tmp_path / "bench", + repeat=1, + include_typescript=False, + ) + + summary = result["summary"] + assert summary["total_cases"] == 4 + assert summary["passed_cases"] == 4 + assert summary["tasks_codegen"] == 4 + assert summary["llm_calls"] == 0 + assert summary["planner_calls"] == 0 + assert summary["release_gates"]["llm_baseline_present"] is False + + +def test_scratch_codegen_bench_writes_reports(tmp_path) -> None: + result = run_benchmark( + work_dir=tmp_path / "bench", + repeat=1, + include_typescript=False, + ) + json_path = tmp_path / "results.json" + md_path = tmp_path / "results.md" + + write_reports(result, json_path, md_path) + + assert '"benchmark": "scratch-codegen"' in json_path.read_text(encoding="utf-8") + assert "# Scratch Codegen Benchmark" in md_path.read_text(encoding="utf-8") diff --git a/tests/python/test_static_fixers_bench.py b/tests/python/test_static_fixers_bench.py new file mode 100644 index 0000000..7527d0b --- /dev/null +++ b/tests/python/test_static_fixers_bench.py @@ -0,0 +1,38 @@ +"""Tests for the static fixer benchmark harness.""" + +from __future__ import annotations + +from bench.run_static_fixers import build_cases, run_benchmark, write_reports + + +def test_static_fixer_bench_cases_have_expected_mix() -> None: + cases = build_cases() + + assert len(cases) == 50 + assert sum(1 for case in cases if case.resolvable) == 40 + assert sum(1 for case in cases if not case.resolvable) == 10 + + +def test_static_fixer_bench_measures_retry_reduction(tmp_path) -> None: + result = run_benchmark(work_dir=tmp_path / "bench") + summary = result["summary"] + + assert summary["total_cases"] == 50 + assert summary["passed_cases"] == 50 + assert summary["fixer_resolved_before_retry"] == 40 + assert summary["fixer_resolved_rate"] == 0.8 + assert summary["baseline_llm_calls"] == 100 + assert summary["with_fixer_llm_calls"] == 60 + assert summary["retry_call_reduction"] == 0.4 + assert summary["release_gates"]["real_scratch_corpus"] is False + + +def test_static_fixer_bench_writes_reports(tmp_path) -> None: + result = run_benchmark(work_dir=tmp_path / "bench") + json_path = tmp_path / "results.json" + md_path = tmp_path / "results.md" + + write_reports(result, json_path, md_path) + + assert '"benchmark": "static-fixers"' in json_path.read_text(encoding="utf-8") + assert "# Static Fixers Benchmark" in md_path.read_text(encoding="utf-8")