Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 11 additions & 41 deletions .crane/scripts/score.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
knownExceptions := knownExceptionsFromEnv(getenv("APM_KNOWN_EXCEPTIONS"))
pythonReference := BoolGate{}
pythonTests := BoolGate{Seen: getenv("APM_PYTHON_TESTS") != "", Passed: getenv("APM_PYTHON_TESTS") == "pass"}
benchmarks := BoolGate{Seen: getenv("APM_BENCHMARKS") != "", Passed: getenv("APM_BENCHMARKS") == "pass"}
benchmarks := RatioGate{}
surface := RatioGate{}
help := RatioGate{}
functional := RatioGate{}
Expand Down Expand Up @@ -224,25 +224,25 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
pythonReference = BoolGate{Seen: true, Passed: testPassed(passed, failed, "TestParityCompletionHardGate") || pythonReferenceReady(getenv("APM_PYTHON_BIN"))}
}
if !surface.Seen {
surface = inferredAnyRatioGate(passed, failed, "TestParityCompletionSurfaceParity", "TestParitySurfaceInventory")
surface = missingRatioGate()
}
if !help.Seen {
help = inferredAllRatioGate(passed, failed, "TestParityCompletionCommandMatrix", "TestParityCompletionHelpIdentical")
help = missingRatioGate()
}
if !functional.Seen {
functional = inferredAnyRatioGate(passed, failed, "TestParityCompletionFunctionalContracts", "TestParityFunctionalContracts")
functional = missingRatioGate()
}
if !stateDiff.Seen {
stateDiff = inferredAnyRatioGate(passed, failed, "TestParityCompletionStateDiffContracts", "TestParityStateDiffContracts")
stateDiff = missingRatioGate()
}
if !behaviorContracts.Seen {
behaviorContracts = RatioGate{Seen: true, Passing: 0, Total: 1}
behaviorContracts = missingRatioGate()
}
if !pythonTests.Seen {
pythonTests = BoolGate{Seen: true, Passed: testPassed(passed, failed, "TestParityCompletionPythonSuite")}
}
if !benchmarks.Seen {
benchmarks = BoolGate{Seen: true, Passed: testPassed(passed, failed, "TestParityCompletionBenchmarks")}
benchmarks = missingRatioGate()
}

goTestsPass := !goTestsFailed && targetTotal > 0 && targetPassing == targetTotal
Expand Down Expand Up @@ -346,7 +346,7 @@ func applyGateEvent(
behaviorContracts *RatioGate,
knownExceptions *int,
pythonTests *BoolGate,
benchmarks *BoolGate,
benchmarks *RatioGate,
) {
switch gate.Name {
case "python_reference":
Expand All @@ -366,7 +366,7 @@ func applyGateEvent(
case "python_tests":
*pythonTests = BoolGate{Seen: true, Passed: gate.Passed}
case "benchmarks":
*benchmarks = BoolGate{Seen: true, Passed: gate.Passed}
*benchmarks = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
}
}

Expand Down Expand Up @@ -399,31 +399,8 @@ func testPassed(passed, failed map[string]bool, names ...string) bool {
return false
}

func inferredAnyRatioGate(passed, failed map[string]bool, names ...string) RatioGate {
for _, name := range names {
if failed[name] {
return RatioGate{Seen: true, Passing: 0, Total: 1}
}
}
return RatioGate{Seen: true, Passing: boolToInt(testPassed(passed, failed, names...)), Total: 1}
}

func inferredAllRatioGate(passed, failed map[string]bool, names ...string) RatioGate {
for _, name := range names {
if failed[name] {
return RatioGate{Seen: true, Passing: 0, Total: 1}
}
}
return RatioGate{Seen: true, Passing: boolToInt(allRequiredTestsPassed(passed, names...)), Total: 1}
}

func allRequiredTestsPassed(passed map[string]bool, names ...string) bool {
for _, name := range names {
if !passed[name] {
return false
}
}
return true
func missingRatioGate() RatioGate {
return RatioGate{Seen: true, Passing: 0, Total: 1}
}

func gateResults(gates CutoverGates) []GateResult {
Expand All @@ -448,13 +425,6 @@ func passFail(ok bool) string {
return "fail"
}

func boolToInt(ok bool) int {
if ok {
return 1
}
return 0
}

func knownExceptionsFromEnv(raw string) int {
if raw == "" {
return 0
Expand Down
57 changes: 53 additions & 4 deletions .github/workflows/migration-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ on:
pull_request:
branches: [main]
workflow_dispatch:
inputs:
enforce_completion:
description: "Fail unless migration completion gates are fully satisfied"
required: false
default: false
type: boolean

permissions:
contents: read
Expand Down Expand Up @@ -99,6 +105,18 @@ jobs:
- name: Run Go parity tests
shell: bash
run: |
enforce_completion=false
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ inputs.enforce_completion == true }}" = "true" ]; then
enforce_completion=true
elif [ "${{ github.event_name }}" = "pull_request" ] && [[ "${{ github.event.pull_request.head.ref }}" == crane/* ]]; then
enforce_completion=true
fi

echo "MIGRATION_COMPLETION_ENFORCED=$enforce_completion" >> "$GITHUB_ENV"
if [ "$enforce_completion" = "true" ]; then
export APM_ENFORCE_COMPLETION_GATES=1
fi

set +e
go test -json ./... | tee "$RUNNER_TEMP/go-test-events.json"
status=${PIPESTATUS[0]}
Expand All @@ -113,21 +131,38 @@ jobs:
--coverage tests/parity/python_contract_coverage.yml \
--allow-intentionally-incomplete \
--summary "$RUNNER_TEMP/python-contract-coverage.md" || true
python - "$RUNNER_TEMP/migration-score.json" <<'PY'
python - "$RUNNER_TEMP/migration-score.json" "${MIGRATION_COMPLETION_ENFORCED:-false}" <<'PY'
import json
import sys

with open(sys.argv[1], encoding="utf-8") as fh:
score = json.load(fh)
enforce_completion = sys.argv[2].lower() == "true"

print(json.dumps(score, indent=2, sort_keys=True))
if not enforce_completion:
print(
"::notice::Non-enforcing migration evidence run; "
"completion gates are enforced only for crane/* PRs and "
"manual runs with enforce_completion=true."
)
raise SystemExit(0)
if score.get("progress") != 1.0:
raise SystemExit("progress must be 1.0 for completion parity")
if score.get("migration_score") == 1.0 and not score.get("deletion_grade_ready"):
raise SystemExit("migration_score 1.0 requires deletion_grade_ready")
PY
test "${PYTHON_CLI_CONTRACT_STATUS:-1}" = "0"
test "${GO_TEST_STATUS:-1}" = "0"
if [ "${MIGRATION_COMPLETION_ENFORCED:-false}" = "true" ]; then
test "${PYTHON_CLI_CONTRACT_STATUS:-1}" = "0"
test "${GO_TEST_STATUS:-1}" = "0"
else
if [ "${PYTHON_CLI_CONTRACT_STATUS:-1}" != "0" ]; then
echo "::notice::Python behavior contract tests are incomplete in collection mode."
fi
if [ "${GO_TEST_STATUS:-1}" != "0" ]; then
echo "::notice::Go parity tests are incomplete in collection mode."
fi
fi

- name: Upload parity evidence
if: always()
Expand Down Expand Up @@ -171,13 +206,27 @@ jobs:
run: go build -o "$RUNNER_TEMP/apm-go" ./cmd/apm

- name: Run Python-vs-Go CLI benchmark
shell: bash
run: |
enforce_completion=false
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ inputs.enforce_completion == true }}" = "true" ]; then
enforce_completion=true
elif [ "${{ github.event_name }}" = "pull_request" ] && [[ "${{ github.event.pull_request.head.ref }}" == crane/* ]]; then
enforce_completion=true
fi

extra_args=()
if [ "$enforce_completion" != "true" ]; then
extra_args+=(--allow-failures)
fi

python scripts/ci/migration_cli_benchmark.py \
--python-bin "$GITHUB_WORKSPACE/.venv/bin/apm" \
--go-bin "$RUNNER_TEMP/apm-go" \
--json-out "$RUNNER_TEMP/migration-cli-benchmark.json" \
--markdown-out "$RUNNER_TEMP/migration-cli-benchmark.md" \
--max-ratio 5.0
--max-ratio 5.0 \
"${extra_args[@]}"

- name: Run Python scaling guards
run: uv run pytest tests/benchmarks/test_scaling_guards.py -v
Expand Down
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,20 @@ Maintainers can dispatch the migration workflow manually:
gh workflow run migration-ci.yml --repo githubnext/apm --ref main
```

That default manual run collects parity and benchmark evidence without treating
known migration gaps as a CI failure. To run the deterministic hard completion
gate, opt in explicitly:

```bash
gh workflow run migration-ci.yml --repo githubnext/apm --ref main -f enforce_completion=true
```

After it runs, open the **Migration Benchmarks** job summary for the timing
table. The same run uploads the `migration-benchmark-evidence` artifact with
JSON and Markdown copies of the benchmark data. In the benchmark table, the
`Go/Python` ratio is the Go median duration divided by the Python median
duration: values below `1.00x` mean Go is faster. Recent smoke benchmark
evidence for startup/help/init-style commands shows the Go CLI roughly
`327x`-`370x` faster than the Python CLI.
duration: values below `1.00x` mean Go is faster. The benchmark includes
fixture-backed commands that read, write, execute, or fail against realistic APM
project state: `apm.yml`, `apm.lock.yaml`, installed `apm_modules`, local
`.apm` primitives, target directories, deployed prompt files, and sample source
files.
64 changes: 60 additions & 4 deletions cmd/apm/CUTOVER.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,59 @@ The Go CLI currently implements:
- `apm init [--yes] [PROJECT_NAME]` (functional, creates apm.yml)
- Per-command `--help` for all 26 commands (golden-file verified)

Remaining commands return a "not yet fully implemented" message.
Most remaining commands are wired at the CLI surface. That is not enough for
cutover. A command that prints success without writing the expected files,
mutating `apm.yml`, updating `apm.lock.yaml`, executing a script, or detecting a
planted failure is still incomplete.

## Real Criteria

Every completion criterion must be backed by real command execution. The scorer
does not infer completion from test names for `surface`, `help`, `functional`,
`state_diff`, `python_behavior_contracts`, or `benchmarks`; each one must emit an
explicit ratio gate.

Crane must run `go test ./cmd/apm -run TestParityRealFunctionalAndStateDiffContracts -json`.
That fixture-backed test executes the built Go `apm` binary in temporary
projects and emits the existing completion gates directly:

```json
{"crane":"gate","name":"functional","passing":N,"total":N}
{"crane":"gate","name":"state_diff","passing":N,"total":N}
```

Crane must also run the migration benchmark test. It executes fixture-backed
Python-vs-Go benchmark workloads and emits:

```json
{"crane":"gate","name":"benchmarks","passing":N,"total":N}
```

A legacy boolean such as `{"name":"benchmarks","passed":true}` is not enough.
The benchmark report must prove that every benchmarked command produced the
expected real artifact or output evidence.

The completion criteria are command-specific:

| Command area | Required proof |
| --- | --- |
| `init` | Creates a real `apm.yml` manifest. |
| `install` | Installs a local package, writes `apm.lock.yaml`, and materializes installed content under `apm_modules/` or target paths. |
| `update` | Mutates the lockfile when a dependency changes and reports a real no-op when nothing changed. |
| `compile` | Writes target artifacts such as `.github/copilot-instructions.md` from fixture project state. |
| `pack` / `unpack` | Writes a non-empty distributable bundle and can extract it back into a temp project. |
| `run` / `preview` / `list` | Reads project scripts, executes or previews the selected script, and reflects the actual manifest contents. |
| `audit` / `policy` | Fails on planted hidden Unicode, missing lockfile state, or policy violations instead of always reporting success. |
| `mcp` / `runtime` / `plugin` / `marketplace` | Persist real manifest or config changes, not just status text. |
| `cache` | Removes cache entries while respecting the configured cache root. |
| `prune` / `uninstall` | Removes only files owned by stale dependencies and proves the removed paths are gone. |
| `deps` / `outdated` / `view` / `search` | Read lockfile, marketplace, or registry fixtures and report fixture-derived results. |
| `self-update` / `experimental` / `config` | Persist or validate real configuration state where the Python command does. |

Each new command implementation should add or extend functional, state-diff, and
benchmark fixture coverage before Crane can claim it moved the migration
forward. Shims, dry-runs, mocks, and help-only assertions do not count as command
completion.

## Cutover Trigger Conditions

Expand All @@ -27,9 +79,13 @@ are true:
`init`, `install`, `update`, `compile`, `pack`, `run`, `audit`,
`policy`, `mcp`, `runtime`, `targets`, `list`, `view`, `cache`,
`deps`, `marketplace`, `uninstall`, `prune`
3. Python-vs-Go parity tests pass for all commands in the matrix
4. `go build ./cmd/apm` produces a single static binary
5. CI passes on the crane PR branch (`crane/crane-migration-python-to-go-full-apm-cli-rewrite`)
3. `TestParityRealFunctionalAndStateDiffContracts` passes every fixture-backed
real-command scenario and emits passing `functional` and `state_diff` gates
4. Python-vs-Go parity tests pass for all commands in the matrix
5. Migration benchmarks pass real fixture-backed command workloads and emit a
passing counted `benchmarks` gate
6. `go build ./cmd/apm` produces a single static binary
7. CI passes on the crane PR branch (`crane/crane-migration-python-to-go-full-apm-cli-rewrite`)

## Cutover Steps

Expand Down
Loading
Loading