Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [0.4.2] — 2026-05-29

### Added
- `simplicio task --dry-run-task --json` for SendSprint orchestration. It
generates the would-be task output, returns the stable
`{task_id, applied, files_changed, tokens_used, cost_usd, diff_summary,
warnings}` JSON contract, and does not write `.simplicio/last_output.txt` or
run the test/apply loop.
- `simplicio task --bound-paths <glob>` repeatable edit-surface guard. Generated
diffs outside the allowed globs are refused before the test loop and reported
as JSON warnings.

### Changed
- **`rust/simplicio-core`: PyO3 `0.22` → `0.28`** (manual major dependency bump,
Expand Down
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,29 @@ simplicio task "..." --stack angular --target ...

How it works: simplicio shells out to `claude -p "<prompt>"` (or `codex exec "<prompt>"`) as a subprocess, captures stdout, runs the test loop. The inner CLI authenticates via your existing OAuth session in `~/.claude/` or `~/.codex/`. simplicio sets `SIMPLICIO_HOOK_GUARD=1` in the subprocess env so the inner Claude Code session does **not** re-fire simplicio's own UserPromptSubmit hook (no infinite recursion).

For orchestrators such as SendSprint, `simplicio task` also has a structured
contract:

```bash
simplicio task "hide Delete button for non-admins" \
--stack angular \
--target src/app/screen/screen.component.html \
--dry-run-task \
--json

simplicio task "front-only task" \
--stack angular \
--target src/app/screen/screen.component.html \
--bound-paths "src/app/**" \
--json
```

`--dry-run-task` generates the would-be diff/test output without applying or
testing it. `--json` returns `{task_id, applied, files_changed, tokens_used,
cost_usd, diff_summary, warnings}`. Repeat `--bound-paths <glob>` to reject
diffs outside the allowed edit surface; violations are reported in `warnings`
and the command exits non-zero.

### Path 3 example — standalone with API key

```bash
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "simplicio-cli"
version = "0.4.1"
version = "0.4.2"
description = "Portable task-to-code pipeline that works with any LLM. Turn a one-line task into a verified code change — diff + test + verify loop. +55 pts on a 156-check benchmark, 21% faster, ~same tokens."
readme = "README.md"
license = { text = "MIT" }
Expand Down
37 changes: 33 additions & 4 deletions simplicio/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from __future__ import annotations

import argparse
import json
import os
import sys
from pathlib import Path
Expand Down Expand Up @@ -51,7 +52,7 @@ def maybe_autoinstall(cmd: str | None) -> bool:
return False


def main():
def main(argv=None):
ap = argparse.ArgumentParser(prog="simplicio")
sub = ap.add_subparsers(dest="cmd", required=True)

Expand All @@ -64,6 +65,12 @@ def main():
pt.add_argument("--target", required=True)
pt.add_argument("--criteria", default="- true state\n- false state")
pt.add_argument("--constraints", default="- build passes")
pt.add_argument("--dry-run-task", action="store_true",
help="generate the would-be task output without applying/testing")
pt.add_argument("--json", action="store_true",
help="emit stable structured task output")
pt.add_argument("--bound-paths", action="append", default=[],
help="glob limiting which paths the task may change; repeatable")


pb = sub.add_parser("bench", help="compare with vs without (real numbers)")
Expand All @@ -82,7 +89,7 @@ def main():
p_det.add_argument("--quiet", action="store_true")
p_det.add_argument("--json", action="store_true")

a = ap.parse_args()
a = ap.parse_args(argv)
maybe_autoinstall(a.cmd)
if a.cmd == "index":
from .precedent import index_repo
Expand Down Expand Up @@ -114,8 +121,30 @@ def main():
argv += ["--json"]
return detect_main(argv)
else:
from .pipeline import run
run(a.root, a.stack, a.goal, a.target, a.criteria, a.constraints)
from .pipeline import run, run_task
if a.json or a.dry_run_task:
result = run_task(
a.root,
a.stack,
a.goal,
a.target,
a.criteria,
a.constraints,
dry_run_task=a.dry_run_task,
bound_paths=a.bound_paths,
quiet=a.json,
)
if a.json:
print(json.dumps(result, sort_keys=True))
else:
status = "DRY-RUN" if a.dry_run_task else "DONE"
print(f"{status}: {result['diff_summary']}")
for warning in result["warnings"]:
print(f"warning: {warning}", file=sys.stderr)
return 0 if (a.dry_run_task or result["applied"]) else 1
run(a.root, a.stack, a.goal, a.target, a.criteria, a.constraints,
bound_paths=a.bound_paths)
return 0

if __name__ == "__main__":
main()
106 changes: 95 additions & 11 deletions simplicio/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""pipeline.py — build -> generate -> validate -> test -> fix (loop)."""
from dataclasses import dataclass
import fnmatch
import os, re, subprocess
from .observability import estimate_tokens, log_run
from .prompt import build_prompt
Expand All @@ -18,7 +19,40 @@ class FailureClassification:
kind: str
guidance: str

def validate_generated_output(output):
def extract_changed_files(output):
text = output or ""
files = []
for match in re.finditer(r"^diff --git a/(.+?) b/(.+?)$", text, flags=re.M):
files.append(match.group(2).strip())
for match in re.finditer(r"^\+\+\+ b/(.+?)$", text, flags=re.M):
files.append(match.group(1).strip())
return list(dict.fromkeys(f for f in files if f and f != "/dev/null"))

def _matches_bound(path, patterns):
normalized = path.replace(os.sep, "/").lstrip("./")
for raw in patterns or []:
pattern = str(raw).replace(os.sep, "/").lstrip("./")
if fnmatch.fnmatch(normalized, pattern):
return True
if pattern.endswith("/**"):
prefix = pattern[:-3].rstrip("/")
if normalized == prefix or normalized.startswith(f"{prefix}/"):
return True
return False

def _bound_path_warnings(files, bound_paths):
if not bound_paths:
return []
outside = [path for path in files if not _matches_bound(path, bound_paths)]
if not outside:
return []
return [
"diff touches path outside bound paths: "
+ ", ".join(outside)
+ f" (allowed: {', '.join(bound_paths)})"
]

def validate_generated_output(output, bound_paths=None):
text = output or ""
hints = []
has_diff = bool(re.search(r"^diff --git |^--- .+\n\+\+\+ ", text, flags=re.M))
Expand All @@ -29,6 +63,7 @@ def validate_generated_output(output):
hints.append("include a TEST block or concrete test code")
if re.search(r"(?i)\b(pseudocode|placeholder|todo: implement)\b", text):
hints.append("replace placeholders with executable code")
hints.extend(_bound_path_warnings(extract_changed_files(output), bound_paths))
return ValidationResult(
ok=not hints,
reason="ok" if not hints else "; ".join(hints),
Expand Down Expand Up @@ -64,24 +99,58 @@ def build_retry_feedback(attempt, validation=None, test_log=""):
lines.append("Return the full corrected DIFF + TEST block only.")
return "\n".join(lines)

def _apply_and_test(output, root):
def _apply_and_test(output, root, bound_paths=None):
os.makedirs(os.path.join(root, ".simplicio"), exist_ok=True)
open(os.path.join(root, ".simplicio/last_output.txt"), "w").write(output or "")
validation = validate_generated_output(output)
validation = validate_generated_output(output, bound_paths)
if not validation.ok:
return False, f"pre-apply validation failed: {validation.reason}"
# PLUG: extract diff -> git apply; extract test. Here we run the test command.
cmd = os.environ.get("SIMPLICIO_TEST_CMD", "echo 'configure SIMPLICIO_TEST_CMD'")
p = subprocess.run(cmd, shell=True, cwd=root, capture_output=True, text=True)
return p.returncode == 0, (p.stdout + p.stderr)[-2000:]

def run(root, stack, goal, target, criteria, constraints):
def _diff_summary(files_changed):
if not files_changed:
return "no changed files reported"
return "changed " + ", ".join(files_changed)

def _task_result(task_id, prompt, output, *, applied, warnings=None):
files_changed = extract_changed_files(output)
return {
"task_id": task_id,
"applied": bool(applied),
"files_changed": files_changed,
"tokens_used": {
"prompt": estimate_tokens(prompt),
"completion": estimate_tokens(output or ""),
},
"cost_usd": 0.0,
"diff_summary": _diff_summary(files_changed),
"warnings": warnings or [],
}

def run_task(root, stack, goal, target, criteria, constraints, *,
dry_run_task=False, bound_paths=None, quiet=False):
prompt = build_prompt(root, stack, goal, target, criteria, constraints)
if dry_run_task:
output = generate(prompt)
validation = validate_generated_output(output, bound_paths)
warnings = [] if validation.ok else [validation.reason]
return _task_result(target, prompt, output, applied=False, warnings=warnings)

feedback = None
last_output = ""
last_validation = None
last_log = ""
for t in range(1, MAX_ATTEMPTS + 1):
print(f"--- attempt {t} (provider={os.environ.get('SIMPLICIO_PROVIDER','claude')}) ---")
if not quiet:
print(f"--- attempt {t} (provider={os.environ.get('SIMPLICIO_PROVIDER','claude')}) ---")
output = generate(prompt, feedback)
ok, log = _apply_and_test(output, root)
last_output = output or ""
last_validation = validate_generated_output(output, bound_paths)
ok, log = _apply_and_test(output, root, bound_paths)
last_log = log
log_run(root, {
"mode": "pipeline",
"attempt": t,
Expand All @@ -92,9 +161,24 @@ def run(root, stack, goal, target, criteria, constraints):
"stack": stack,
})
if ok:
print("PASSED the contract. DONE.")
return output
print("failed:", log[:300])
feedback = build_retry_feedback(t + 1, validate_generated_output(output), log)
print("attempts exhausted — manual review needed.")
if not quiet:
print("PASSED the contract. DONE.")
return _task_result(target, prompt, output, applied=True)
if not quiet:
print("failed:", log[:300])
feedback = build_retry_feedback(t + 1, last_validation, log)
if not quiet:
print("attempts exhausted — manual review needed.")
warnings = []
if last_validation and not last_validation.ok:
warnings.append(last_validation.reason)
elif last_log:
warnings.append(last_log[:500])
return _task_result(target, prompt, last_output, applied=False, warnings=warnings)

def run(root, stack, goal, target, criteria, constraints, bound_paths=None):
result = run_task(root, stack, goal, target, criteria, constraints,
bound_paths=bound_paths)
if result["applied"]:
return result
return None
108 changes: 108 additions & 0 deletions tests/python/test_task_json_contract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import json

from simplicio import cli


def _write(path, text):
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(text, encoding="utf-8")


def _diff(path):
return "\n".join([
f"diff --git a/{path} b/{path}",
f"--- a/{path}",
f"+++ b/{path}",
"@@ -1 +1 @@",
"-old",
"+new",
"",
"TEST:",
"assert True",
])


def test_task_dry_run_json_does_not_touch_worktree(tmp_path, monkeypatch, capsys):
_write(tmp_path / "frontend" / "app.ts", "old\n")
monkeypatch.setenv("SIMPLICIO_SKIP_AUTO_INIT", "1")
monkeypatch.setattr("simplicio.pipeline.generate", lambda *a, **k: _diff("frontend/app.ts"))

code = cli.main([
"task",
"update app",
"--root",
str(tmp_path),
"--stack",
"angular",
"--target",
"frontend/app.ts",
"--dry-run-task",
"--json",
])

assert code == 0
payload = json.loads(capsys.readouterr().out)
assert payload["task_id"] == "frontend/app.ts"
assert payload["applied"] is False
assert payload["files_changed"] == ["frontend/app.ts"]
assert payload["tokens_used"]["prompt"] > 0
assert payload["tokens_used"]["completion"] > 0
assert payload["cost_usd"] == 0.0
assert payload["warnings"] == []
assert not (tmp_path / ".simplicio" / "last_output.txt").exists()
assert (tmp_path / "frontend" / "app.ts").read_text(encoding="utf-8") == "old\n"


def test_task_json_reports_normal_run(tmp_path, monkeypatch, capsys):
_write(tmp_path / "frontend" / "app.ts", "old\n")
monkeypatch.setenv("SIMPLICIO_SKIP_AUTO_INIT", "1")
monkeypatch.setenv("SIMPLICIO_TEST_CMD", "true")
monkeypatch.setattr("simplicio.pipeline.generate", lambda *a, **k: _diff("frontend/app.ts"))

code = cli.main([
"task",
"update app",
"--root",
str(tmp_path),
"--stack",
"angular",
"--target",
"frontend/app.ts",
"--json",
])

assert code == 0
payload = json.loads(capsys.readouterr().out)
assert payload["applied"] is True
assert payload["files_changed"] == ["frontend/app.ts"]
assert "frontend/app.ts" in payload["diff_summary"]
assert payload["warnings"] == []
assert (tmp_path / ".simplicio" / "last_output.txt").exists()


def test_task_bound_paths_refuses_out_of_scope_diff(tmp_path, monkeypatch, capsys):
_write(tmp_path / "frontend" / "app.ts", "old\n")
_write(tmp_path / "backend" / "app.ts", "old\n")
monkeypatch.setenv("SIMPLICIO_SKIP_AUTO_INIT", "1")
monkeypatch.setenv("SIMPLICIO_TEST_CMD", "true")
monkeypatch.setattr("simplicio.pipeline.generate", lambda *a, **k: _diff("backend/app.ts"))

code = cli.main([
"task",
"update app",
"--root",
str(tmp_path),
"--stack",
"angular",
"--target",
"frontend/app.ts",
"--bound-paths",
"frontend/**",
"--json",
])

assert code == 1
payload = json.loads(capsys.readouterr().out)
assert payload["applied"] is False
assert payload["files_changed"] == ["backend/app.ts"]
assert any("outside bound paths" in warning for warning in payload["warnings"])
Loading