Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions simplicio/scratch/codegen/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Deterministic code-generation executors for scratch tasks."""

from .registry import register_executor, registered_executors, try_execute
from .types import CodegenResult, TaskExecutor

__all__ = [
"CodegenResult",
"TaskExecutor",
"register_executor",
"registered_executors",
"try_execute",
]
32 changes: 32 additions & 0 deletions simplicio/scratch/codegen/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Registry for deterministic scratch task executors."""

from __future__ import annotations

from pathlib import Path
from typing import Iterable

from ..plan_schema import Task
from ..stack_registry import Stack
from .types import CodegenResult, TaskExecutor

_DEFAULT_EXECUTORS: list[TaskExecutor] = []


def registered_executors() -> list[TaskExecutor]:
return list(_DEFAULT_EXECUTORS)


def register_executor(executor: TaskExecutor) -> None:
_DEFAULT_EXECUTORS.append(executor)


def try_execute(
task: Task,
project_dir: Path,
stack: Stack,
executors: Iterable[TaskExecutor] | None = None,
) -> CodegenResult | None:
for executor in executors if executors is not None else _DEFAULT_EXECUTORS:
if executor.can_handle(task, stack):
return executor.execute(task, project_dir, stack)
return None
30 changes: 30 additions & 0 deletions simplicio/scratch/codegen/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Shared contracts for deterministic scratch task executors."""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path

from ..plan_schema import Task
from ..stack_registry import Stack


@dataclass
class CodegenResult:
passed: bool
files_modified: list[Path] = field(default_factory=list)
log: str = ""
fallback_to_llm: bool = False


class TaskExecutor(ABC):
name: str

@abstractmethod
def can_handle(self, task: Task, stack: Stack) -> bool:
"""Return True when this executor can handle the task mechanically."""

@abstractmethod
def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult:
"""Apply the deterministic task edit and return the execution result."""
32 changes: 30 additions & 2 deletions simplicio/scratch/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

import json
import os
import shutil
import subprocess
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional

from .codegen import CodegenResult, try_execute
from .plan_schema import Plan, Task
from .stack_registry import Stack

Expand Down Expand Up @@ -125,14 +125,25 @@ def _execute_one_task(task: Task, project_dir: Path,
can still be smoke-tested. When SIMPLICIO_MODEL IS set, defers to
simplicio.pipeline via the adapter."""
t0 = time.perf_counter()
codegen_log = ""

codegen_result = try_execute(task, project_dir, stack)
if codegen_result is not None:
codegen_log = codegen_result.log
if codegen_result.passed or not codegen_result.fallback_to_llm:
return _task_result_from_codegen(task, t0, codegen_result)

if not os.environ.get("SIMPLICIO_MODEL"):
# smoke-test mode: log the task but mark as skipped (no LLM call made)
ms = int((time.perf_counter() - t0) * 1000)
fallback_note = (
f"codegen fallback: {codegen_log[:200]}\n"
if codegen_log else ""
)
return TaskResult(
id=task.id, target=task.target, passed=False, duration_ms=ms,
skipped_reason="no SIMPLICIO_MODEL set; task generation skipped",
log_tail=f"goal={task.goal[:200]}",
log_tail=f"{fallback_note}goal={task.goal[:200]}",
)

try:
Expand All @@ -145,11 +156,28 @@ def _execute_one_task(task: Task, project_dir: Path,
)

passed, log = run_task(task, project_dir, stack)
if codegen_log:
log = f"codegen fallback: {codegen_log}\n\n{log}"
ms = int((time.perf_counter() - t0) * 1000)
return TaskResult(id=task.id, target=task.target, passed=passed,
duration_ms=ms, log_tail=log)


def _task_result_from_codegen(
task: Task, started_at: float, result: CodegenResult
) -> TaskResult:
ms = int((time.perf_counter() - started_at) * 1000)
files = ", ".join(str(path) for path in result.files_modified)
suffix = f"\nfiles_modified={files}" if files else ""
return TaskResult(
id=task.id,
target=task.target,
passed=result.passed,
duration_ms=ms,
log_tail=f"{result.log}{suffix}".strip(),
)


def execute_plan(plan: Plan, stack: Stack, parent_dir: Path,
skip_install: bool = False) -> ExecutorReport:
"""Materialize the plan into parent_dir/<project_name>/."""
Expand Down
104 changes: 104 additions & 0 deletions tests/python/test_scratch_codegen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Tests for deterministic scratch codegen executor plumbing."""

from __future__ import annotations

from pathlib import Path

from simplicio.scratch.codegen import CodegenResult, TaskExecutor
from simplicio.scratch.codegen import registry as codegen_registry
from simplicio.scratch.executor import _execute_one_task
from simplicio.scratch.plan_schema import Task
from simplicio.scratch.stack_registry import Stack


def _task() -> Task:
return Task(
id="T01-codegen",
goal="add deterministic file",
target="src/app.py",
criteria="file exists",
constraints="no llm",
verify="pytest -q",
)


def _stack(tmp_path: Path) -> Stack:
return Stack(
slug="py-fastapi",
path=tmp_path,
meta={"language": "Python", "framework": "FastAPI"},
)


class _Executor(TaskExecutor):
name = "fake"

def __init__(
self, *, can_handle: bool = True, result: CodegenResult | None = None
) -> None:
self._can_handle = can_handle
self._result = result or CodegenResult(passed=True, log="mechanical ok")
self.calls = 0

def can_handle(self, task: Task, stack: Stack) -> bool:
return self._can_handle

def execute(self, task: Task, project_dir: Path, stack: Stack) -> CodegenResult:
self.calls += 1
return self._result


def test_empty_registry_returns_none(tmp_path, monkeypatch):
monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", [])
assert codegen_registry.try_execute(_task(), tmp_path, _stack(tmp_path)) is None


def test_registry_executes_first_matching_executor(tmp_path):
skipped = _Executor(can_handle=False)
matched = _Executor(result=CodegenResult(passed=True, log="matched"))
result = codegen_registry.try_execute(
_task(), tmp_path, _stack(tmp_path), [skipped, matched]
)
assert result is not None
assert result.log == "matched"
assert skipped.calls == 0
assert matched.calls == 1


def test_successful_codegen_runs_without_model(tmp_path, monkeypatch):
executor = _Executor(
result=CodegenResult(
passed=True, files_modified=[tmp_path / "src/app.py"], log="done"
)
)
monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", [executor])
monkeypatch.delenv("SIMPLICIO_MODEL", raising=False)
result = _execute_one_task(_task(), tmp_path, _stack(tmp_path))
assert result.passed is True
assert result.skipped_reason is None
assert "done" in result.log_tail
assert "files_modified" in result.log_tail


def test_codegen_failure_without_fallback_does_not_call_llm(tmp_path, monkeypatch):
executor = _Executor(result=CodegenResult(passed=False, log="missing class"))
monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", [executor])
monkeypatch.setenv("SIMPLICIO_MODEL", "fake-model")
result = _execute_one_task(_task(), tmp_path, _stack(tmp_path))
assert result.passed is False
assert result.skipped_reason is None
assert result.log_tail == "missing class"


def test_codegen_fallback_preserves_existing_stub_mode(tmp_path, monkeypatch):
executor = _Executor(
result=CodegenResult(
passed=False, log="shape unsupported", fallback_to_llm=True
)
)
monkeypatch.setattr(codegen_registry, "_DEFAULT_EXECUTORS", [executor])
monkeypatch.delenv("SIMPLICIO_MODEL", raising=False)
result = _execute_one_task(_task(), tmp_path, _stack(tmp_path))
assert result.passed is False
assert result.skipped_reason == "no SIMPLICIO_MODEL set; task generation skipped"
assert "codegen fallback: shape unsupported" in result.log_tail