diff --git a/CHANGELOG.md b/CHANGELOG.md index 6eb86c0..a9659c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.4.2] — 2026-05-29 + +### Added +- `simplicio task --dry-run-task --json` for SendSprint orchestration. It + generates the would-be task output, returns the stable + `{task_id, applied, files_changed, tokens_used, cost_usd, diff_summary, + warnings}` JSON contract, and does not write `.simplicio/last_output.txt` or + run the test/apply loop. +- `simplicio task --bound-paths ` repeatable edit-surface guard. Generated + diffs outside the allowed globs are refused before the test loop and reported + as JSON warnings. ### Changed - **`rust/simplicio-core`: PyO3 `0.22` → `0.28`** (manual major dependency bump, diff --git a/README.md b/README.md index b958ac6..afab07f 100644 --- a/README.md +++ b/README.md @@ -363,6 +363,29 @@ simplicio task "..." --stack angular --target ... How it works: simplicio shells out to `claude -p ""` (or `codex exec ""`) as a subprocess, captures stdout, runs the test loop. The inner CLI authenticates via your existing OAuth session in `~/.claude/` or `~/.codex/`. simplicio sets `SIMPLICIO_HOOK_GUARD=1` in the subprocess env so the inner Claude Code session does **not** re-fire simplicio's own UserPromptSubmit hook (no infinite recursion). +For orchestrators such as SendSprint, `simplicio task` also has a structured +contract: + +```bash +simplicio task "hide Delete button for non-admins" \ + --stack angular \ + --target src/app/screen/screen.component.html \ + --dry-run-task \ + --json + +simplicio task "front-only task" \ + --stack angular \ + --target src/app/screen/screen.component.html \ + --bound-paths "src/app/**" \ + --json +``` + +`--dry-run-task` generates the would-be diff/test output without applying or +testing it. `--json` returns `{task_id, applied, files_changed, tokens_used, +cost_usd, diff_summary, warnings}`. Repeat `--bound-paths ` to reject +diffs outside the allowed edit surface; violations are reported in `warnings` +and the command exits non-zero. + ### Path 3 example — standalone with API key ```bash diff --git a/pyproject.toml b/pyproject.toml index 244b1c5..fafaccb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "simplicio-cli" -version = "0.4.1" +version = "0.4.2" description = "Portable task-to-code pipeline that works with any LLM. Turn a one-line task into a verified code change — diff + test + verify loop. +55 pts on a 156-check benchmark, 21% faster, ~same tokens." readme = "README.md" license = { text = "MIT" } diff --git a/simplicio/cli.py b/simplicio/cli.py index dfb5e50..eafbdad 100644 --- a/simplicio/cli.py +++ b/simplicio/cli.py @@ -12,6 +12,7 @@ from __future__ import annotations import argparse +import json import os import sys from pathlib import Path @@ -51,7 +52,7 @@ def maybe_autoinstall(cmd: str | None) -> bool: return False -def main(): +def main(argv=None): ap = argparse.ArgumentParser(prog="simplicio") sub = ap.add_subparsers(dest="cmd", required=True) @@ -64,6 +65,12 @@ def main(): pt.add_argument("--target", required=True) pt.add_argument("--criteria", default="- true state\n- false state") pt.add_argument("--constraints", default="- build passes") + pt.add_argument("--dry-run-task", action="store_true", + help="generate the would-be task output without applying/testing") + pt.add_argument("--json", action="store_true", + help="emit stable structured task output") + pt.add_argument("--bound-paths", action="append", default=[], + help="glob limiting which paths the task may change; repeatable") pb = sub.add_parser("bench", help="compare with vs without (real numbers)") @@ -82,7 +89,7 @@ def main(): p_det.add_argument("--quiet", action="store_true") p_det.add_argument("--json", action="store_true") - a = ap.parse_args() + a = ap.parse_args(argv) maybe_autoinstall(a.cmd) if a.cmd == "index": from .precedent import index_repo @@ -114,8 +121,30 @@ def main(): argv += ["--json"] return detect_main(argv) else: - from .pipeline import run - run(a.root, a.stack, a.goal, a.target, a.criteria, a.constraints) + from .pipeline import run, run_task + if a.json or a.dry_run_task: + result = run_task( + a.root, + a.stack, + a.goal, + a.target, + a.criteria, + a.constraints, + dry_run_task=a.dry_run_task, + bound_paths=a.bound_paths, + quiet=a.json, + ) + if a.json: + print(json.dumps(result, sort_keys=True)) + else: + status = "DRY-RUN" if a.dry_run_task else "DONE" + print(f"{status}: {result['diff_summary']}") + for warning in result["warnings"]: + print(f"warning: {warning}", file=sys.stderr) + return 0 if (a.dry_run_task or result["applied"]) else 1 + run(a.root, a.stack, a.goal, a.target, a.criteria, a.constraints, + bound_paths=a.bound_paths) + return 0 if __name__ == "__main__": main() diff --git a/simplicio/pipeline.py b/simplicio/pipeline.py index b94c2f1..f1edd21 100644 --- a/simplicio/pipeline.py +++ b/simplicio/pipeline.py @@ -1,5 +1,6 @@ """pipeline.py — build -> generate -> validate -> test -> fix (loop).""" from dataclasses import dataclass +import fnmatch import os, re, subprocess from .observability import estimate_tokens, log_run from .prompt import build_prompt @@ -18,7 +19,40 @@ class FailureClassification: kind: str guidance: str -def validate_generated_output(output): +def extract_changed_files(output): + text = output or "" + files = [] + for match in re.finditer(r"^diff --git a/(.+?) b/(.+?)$", text, flags=re.M): + files.append(match.group(2).strip()) + for match in re.finditer(r"^\+\+\+ b/(.+?)$", text, flags=re.M): + files.append(match.group(1).strip()) + return list(dict.fromkeys(f for f in files if f and f != "/dev/null")) + +def _matches_bound(path, patterns): + normalized = path.replace(os.sep, "/").lstrip("./") + for raw in patterns or []: + pattern = str(raw).replace(os.sep, "/").lstrip("./") + if fnmatch.fnmatch(normalized, pattern): + return True + if pattern.endswith("/**"): + prefix = pattern[:-3].rstrip("/") + if normalized == prefix or normalized.startswith(f"{prefix}/"): + return True + return False + +def _bound_path_warnings(files, bound_paths): + if not bound_paths: + return [] + outside = [path for path in files if not _matches_bound(path, bound_paths)] + if not outside: + return [] + return [ + "diff touches path outside bound paths: " + + ", ".join(outside) + + f" (allowed: {', '.join(bound_paths)})" + ] + +def validate_generated_output(output, bound_paths=None): text = output or "" hints = [] has_diff = bool(re.search(r"^diff --git |^--- .+\n\+\+\+ ", text, flags=re.M)) @@ -29,6 +63,7 @@ def validate_generated_output(output): hints.append("include a TEST block or concrete test code") if re.search(r"(?i)\b(pseudocode|placeholder|todo: implement)\b", text): hints.append("replace placeholders with executable code") + hints.extend(_bound_path_warnings(extract_changed_files(output), bound_paths)) return ValidationResult( ok=not hints, reason="ok" if not hints else "; ".join(hints), @@ -64,10 +99,10 @@ def build_retry_feedback(attempt, validation=None, test_log=""): lines.append("Return the full corrected DIFF + TEST block only.") return "\n".join(lines) -def _apply_and_test(output, root): +def _apply_and_test(output, root, bound_paths=None): os.makedirs(os.path.join(root, ".simplicio"), exist_ok=True) open(os.path.join(root, ".simplicio/last_output.txt"), "w").write(output or "") - validation = validate_generated_output(output) + validation = validate_generated_output(output, bound_paths) if not validation.ok: return False, f"pre-apply validation failed: {validation.reason}" # PLUG: extract diff -> git apply; extract test. Here we run the test command. @@ -75,13 +110,47 @@ def _apply_and_test(output, root): p = subprocess.run(cmd, shell=True, cwd=root, capture_output=True, text=True) return p.returncode == 0, (p.stdout + p.stderr)[-2000:] -def run(root, stack, goal, target, criteria, constraints): +def _diff_summary(files_changed): + if not files_changed: + return "no changed files reported" + return "changed " + ", ".join(files_changed) + +def _task_result(task_id, prompt, output, *, applied, warnings=None): + files_changed = extract_changed_files(output) + return { + "task_id": task_id, + "applied": bool(applied), + "files_changed": files_changed, + "tokens_used": { + "prompt": estimate_tokens(prompt), + "completion": estimate_tokens(output or ""), + }, + "cost_usd": 0.0, + "diff_summary": _diff_summary(files_changed), + "warnings": warnings or [], + } + +def run_task(root, stack, goal, target, criteria, constraints, *, + dry_run_task=False, bound_paths=None, quiet=False): prompt = build_prompt(root, stack, goal, target, criteria, constraints) + if dry_run_task: + output = generate(prompt) + validation = validate_generated_output(output, bound_paths) + warnings = [] if validation.ok else [validation.reason] + return _task_result(target, prompt, output, applied=False, warnings=warnings) + feedback = None + last_output = "" + last_validation = None + last_log = "" for t in range(1, MAX_ATTEMPTS + 1): - print(f"--- attempt {t} (provider={os.environ.get('SIMPLICIO_PROVIDER','claude')}) ---") + if not quiet: + print(f"--- attempt {t} (provider={os.environ.get('SIMPLICIO_PROVIDER','claude')}) ---") output = generate(prompt, feedback) - ok, log = _apply_and_test(output, root) + last_output = output or "" + last_validation = validate_generated_output(output, bound_paths) + ok, log = _apply_and_test(output, root, bound_paths) + last_log = log log_run(root, { "mode": "pipeline", "attempt": t, @@ -92,9 +161,24 @@ def run(root, stack, goal, target, criteria, constraints): "stack": stack, }) if ok: - print("PASSED the contract. DONE.") - return output - print("failed:", log[:300]) - feedback = build_retry_feedback(t + 1, validate_generated_output(output), log) - print("attempts exhausted — manual review needed.") + if not quiet: + print("PASSED the contract. DONE.") + return _task_result(target, prompt, output, applied=True) + if not quiet: + print("failed:", log[:300]) + feedback = build_retry_feedback(t + 1, last_validation, log) + if not quiet: + print("attempts exhausted — manual review needed.") + warnings = [] + if last_validation and not last_validation.ok: + warnings.append(last_validation.reason) + elif last_log: + warnings.append(last_log[:500]) + return _task_result(target, prompt, last_output, applied=False, warnings=warnings) + +def run(root, stack, goal, target, criteria, constraints, bound_paths=None): + result = run_task(root, stack, goal, target, criteria, constraints, + bound_paths=bound_paths) + if result["applied"]: + return result return None diff --git a/tests/python/test_task_json_contract.py b/tests/python/test_task_json_contract.py new file mode 100644 index 0000000..6e9253e --- /dev/null +++ b/tests/python/test_task_json_contract.py @@ -0,0 +1,108 @@ +import json + +from simplicio import cli + + +def _write(path, text): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +def _diff(path): + return "\n".join([ + f"diff --git a/{path} b/{path}", + f"--- a/{path}", + f"+++ b/{path}", + "@@ -1 +1 @@", + "-old", + "+new", + "", + "TEST:", + "assert True", + ]) + + +def test_task_dry_run_json_does_not_touch_worktree(tmp_path, monkeypatch, capsys): + _write(tmp_path / "frontend" / "app.ts", "old\n") + monkeypatch.setenv("SIMPLICIO_SKIP_AUTO_INIT", "1") + monkeypatch.setattr("simplicio.pipeline.generate", lambda *a, **k: _diff("frontend/app.ts")) + + code = cli.main([ + "task", + "update app", + "--root", + str(tmp_path), + "--stack", + "angular", + "--target", + "frontend/app.ts", + "--dry-run-task", + "--json", + ]) + + assert code == 0 + payload = json.loads(capsys.readouterr().out) + assert payload["task_id"] == "frontend/app.ts" + assert payload["applied"] is False + assert payload["files_changed"] == ["frontend/app.ts"] + assert payload["tokens_used"]["prompt"] > 0 + assert payload["tokens_used"]["completion"] > 0 + assert payload["cost_usd"] == 0.0 + assert payload["warnings"] == [] + assert not (tmp_path / ".simplicio" / "last_output.txt").exists() + assert (tmp_path / "frontend" / "app.ts").read_text(encoding="utf-8") == "old\n" + + +def test_task_json_reports_normal_run(tmp_path, monkeypatch, capsys): + _write(tmp_path / "frontend" / "app.ts", "old\n") + monkeypatch.setenv("SIMPLICIO_SKIP_AUTO_INIT", "1") + monkeypatch.setenv("SIMPLICIO_TEST_CMD", "true") + monkeypatch.setattr("simplicio.pipeline.generate", lambda *a, **k: _diff("frontend/app.ts")) + + code = cli.main([ + "task", + "update app", + "--root", + str(tmp_path), + "--stack", + "angular", + "--target", + "frontend/app.ts", + "--json", + ]) + + assert code == 0 + payload = json.loads(capsys.readouterr().out) + assert payload["applied"] is True + assert payload["files_changed"] == ["frontend/app.ts"] + assert "frontend/app.ts" in payload["diff_summary"] + assert payload["warnings"] == [] + assert (tmp_path / ".simplicio" / "last_output.txt").exists() + + +def test_task_bound_paths_refuses_out_of_scope_diff(tmp_path, monkeypatch, capsys): + _write(tmp_path / "frontend" / "app.ts", "old\n") + _write(tmp_path / "backend" / "app.ts", "old\n") + monkeypatch.setenv("SIMPLICIO_SKIP_AUTO_INIT", "1") + monkeypatch.setenv("SIMPLICIO_TEST_CMD", "true") + monkeypatch.setattr("simplicio.pipeline.generate", lambda *a, **k: _diff("backend/app.ts")) + + code = cli.main([ + "task", + "update app", + "--root", + str(tmp_path), + "--stack", + "angular", + "--target", + "frontend/app.ts", + "--bound-paths", + "frontend/**", + "--json", + ]) + + assert code == 1 + payload = json.loads(capsys.readouterr().out) + assert payload["applied"] is False + assert payload["files_changed"] == ["backend/app.ts"] + assert any("outside bound paths" in warning for warning in payload["warnings"])