open-cli-collective · rianjs · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/README.md b/README.md
@@ -690,6 +690,15 @@ All commands accept the global flags:
 |------|-----------|
 | `--profile <name>` | Select a configured profile and bypass repo-aware routing. When omitted, PR-aware commands use `repository_profiles`; commands that cannot resolve a route require `--profile`. During `init`, an omitted profile starts with the suggested name `default`. |
 | `--backend <name>` | Compatibility/runtime backend selector. It cannot override an explicit credential store destination. |
+| `--quiet` | Suppress structured progress logs on stderr. Command stdout and stderr error reporting stay unchanged. |
+
+Progress-bearing commands emit one-line structured events on stderr in the
+form `cr progress event=<start|finish|error> command="..." op="..." target="..." ...`.
+This output is intended for human progress visibility and simple substring
+parsers. JSON and text results continue to write to stdout only. In this slice,
+structured progress is enabled for `benchmark run`, `benchmark select`,
+`benchmark compare`, `data prune`, `data purge`, `config clear`, and
+`sessions delete`.
 
 ### `cr`
 
@@ -899,6 +908,11 @@ reset, `config.yml` is removed.
 `config clear` never touches durable review data. Use `cr data purge` for the
 data pillar.
 
+When progress logging is enabled, `config clear` brackets config load, profile
+resolution, credential-store open, credential deletion, optional profile-config
+removal, and optional cache cleanup on stderr. Partial-success failures still
+render their JSON or text result on stdout before returning the final error.
+
 ### `cr me`
 
 ```text
@@ -1076,6 +1090,12 @@ reviewed by the dry-run child command. Optional `stages.synthesis` metadata is
 reserved for future benchmark support and does not change `benchmark run`
 execution yet.
 
+`benchmark run` emits progress on stderr for suite load and selection, results
+directory creation, each matrix run, child `cr review` subprocess execution,
+suite artifact writes, and comparison artifact generation. Child review
+subprocesses are invoked with `--quiet` so their internal progress does not
+pollute captured benchmark stderr artifacts.
+
 ### `cr benchmark select`
 
 ```text
@@ -1106,6 +1126,10 @@ When one selector run fails after partial execution, `benchmark select` records
 that run as failed and continues the remaining matrix when it can still write
 trustworthy suite artifacts.
 
+`benchmark select` emits progress on stderr for suite load and selection,
+results directory creation, per-profile runtime open, each selector run,
+selection pipeline execution, and artifact generation.
+
 ### `cr benchmark compare`
 
 ```text
@@ -1118,6 +1142,9 @@ does not invoke models, read live PR state, mutate Git provider state, or
 require provider credentials. `cr benchmark run` and `cr benchmark select`
 write the same comparison artifacts automatically.
 
+`benchmark compare` emits progress on stderr while it loads the suite summary,
+builds the comparison model, and writes JSON and Markdown artifacts.
+
 ### `cr sessions list`
 
 ```text
@@ -1146,6 +1173,9 @@ cr sessions delete <name> [--json]
 Deletes one named LLM session row. It does not delete provider-side session
 state. Missing sessions return an error.
 
+`sessions delete` emits progress on stderr for layout resolution, legacy
+migration, ledger open, and session deletion.
+
 ### `cr data show`
 
 ```text
@@ -1180,6 +1210,10 @@ Prune deletes the ledger row first, then removes artifact directories best
 effort. Unsafe artifact paths and remove failures are reported as warnings after
 the ledger row is deleted.
 
+When progress logging is enabled, `data prune` emits stderr brackets for layout
+resolution, optional legacy migration, ledger open, run selection, delete
+actions, orphan discovery and removal, and the dry-run legacy check.
+
 ### `cr data purge`
 
 ```text
@@ -1191,6 +1225,9 @@ Purges the whole local data root. `--yes` is required unless `--dry-run` is set.
 Purge does not open the ledger database, so it can remove a corrupt local data
 root. `--json` emits the data root, dry-run status, and removed status.
 
+`data purge` emits progress on stderr for layout resolution and the purge
+action itself.
+
 Flags:
 
 | Flag | Semantics |

diff --git a/docs/development.md b/docs/development.md
@@ -26,6 +26,25 @@ Structured LLM calls in the review pipeline are durable per-task units. See
 `docs/llm-task-artifacts.md` for the artifact schema, status taxonomy, and
 resume invariants.
 
+## Progress Logging
+
+Non-review CLI progress now flows through one reusable component in
+`internal/progress`. The contract for this slice is:
+
+- progress writes to stderr only
+- JSON and text result payloads stay on stdout
+- `--quiet` suppresses progress only
+- lines are single-line structured records:
+  `cr progress event=<start|finish|error> command="..." op="..." target="..." ...`
+
+Current command coverage is `benchmark run`, `benchmark select`,
+`benchmark compare`, `data prune`, `data purge`, `config clear`, and
+`sessions delete`.
+
+Command packages own the `command=` label and stderr sink. Lower-level reuse is
+intentionally narrow: `internal/datalifecycle` accepts a tiny start/end
+progress interface rather than importing Cobra or root options.
+
 ## Quick Commands
 
 ```bash

diff --git a/internal/cmd/benchmarkcmd/benchmarkcmd_test.go b/internal/cmd/benchmarkcmd/benchmarkcmd_test.go
@@ -125,6 +125,99 @@ func TestDoctorJSONUsesDefaultExecutable(t *testing.T) {
 	}
 }
 
+func TestBenchmarkCompareProgressWritesToStderr(t *testing.T) {
+	resultsDir := t.TempDir()
+	summary := benchmarkSuiteSummary{
+		SchemaVersion: benchmarkArtifactSchemaVersion,
+		Mode:          benchmarkModeReview,
+		SuiteID:       "suite1",
+		ResultsDir:    resultsDir,
+		SelectedCandidates: []benchmarkCandidate{{
+			ID:      "first",
+			Profile: "home",
+			Stages: benchmarkCandidateStages{Selection: benchmarkSelectionStage{Model: "kimi"}},
+		}},
+		SelectedCases: []benchmarkCase{{ID: "case_one", PR: "https://github.com/open-cli-collective/codereview-cli/pull/1"}},
+		Runs: []benchmarkRun{
+			matrixFixtureRun(resultsDir, "0001-c01-k01-first-case_one", "first", "case_one", 0, failureNone, 1, map[string]int{"major": 1}, 10),
+		},
+		Artifacts: suiteArtifacts{
+			Manifest:           filepath.Join(resultsDir, "manifest.json"),
+			SummaryJSONL:       filepath.Join(resultsDir, "summary.jsonl"),
+			SuiteSummary:       filepath.Join(resultsDir, "suite-summary.json"),
+			Report:             filepath.Join(resultsDir, "report.md"),
+			ComparisonJSON:     filepath.Join(resultsDir, "comparison.json"),
+			ComparisonMarkdown: filepath.Join(resultsDir, "comparison.md"),
+		},
+	}
+	writeComparisonFixture(t, summary)
+	writeReviewJSON(t, summary.Runs[0].Artifacts.ReviewJSON, view.ReviewDryRun{Run: view.ReviewRun{RunID: "child-run-1"}})
+
+	cfgPath := filepath.Join(t.TempDir(), "config.yml")
+	if err := config.Save(cfgPath, testConfig()); err != nil {
+		t.Fatalf("config Save: %v", err)
+	}
+	var stdout bytes.Buffer
+	var stderr bytes.Buffer
+	cmd, opts := root.NewCommandWithOptions(&root.Options{
+		ConfigPath: cfgPath,
+		Stdout:     &stdout,
+		Stderr:     &stderr,
+		Quiet:      false,
+	})
+	Register(cmd, opts)
+
+	if err := root.Execute(cmd, []string{"benchmark", "compare", resultsDir, "--json"}); err != nil {
+		t.Fatalf("Execute: %v", err)
+	}
+	if !strings.Contains(stderr.String(), `command="benchmark.compare" op="load_summary"`) {
+		t.Fatalf("stderr = %q, want load_summary progress", stderr.String())
+	}
+	if !strings.Contains(stderr.String(), `command="benchmark.compare" op="write_markdown"`) {
+		t.Fatalf("stderr = %q, want write_markdown progress", stderr.String())
+	}
+	var got comparisonReport
+	if err := json.Unmarshal(stdout.Bytes(), &got); err != nil {
+		t.Fatalf("Unmarshal JSON: %v\nstdout=%s\nstderr=%s", err, stdout.String(), stderr.String())
+	}
+}
+
+func TestRunProgressWritesToStderr(t *testing.T) {
+	cmd, out, errOut := newTestCommandWithStderr(t, false)
+	suitePath := writeBenchmarkSuite(t, validBenchmarkSuite(t))
+	crBin := writeExecutableCRBin(t)
+	resultsDir := filepath.Join(t.TempDir(), "results")
+	withBenchmarkRunSeams(t, fixedBenchmarkTime(), func(_ context.Context, _ string, _ []string) reviewCommandResult {
+		return reviewCommandResult{
+			Stdout:   reviewDryRunJSON(t, "child-run-1"),
+			Stderr:   []byte("child stderr\n"),
+			ExitCode: 0,
+			Duration: time.Second,
+		}
+	})
+
+	if err := root.Execute(cmd, []string{
+		"benchmark", "run", suitePath,
+		"--candidate", "first",
+		"--case", "case_one",
+		"--results-dir", resultsDir,
+		"--cr-bin", crBin,
+		"--json",
+	}); err != nil {
+		t.Fatalf("Execute: %v", err)
+	}
+	if !strings.Contains(errOut.String(), `command="benchmark.run" op="execute_run"`) {
+		t.Fatalf("stderr = %q, want execute_run progress", errOut.String())
+	}
+	if !strings.Contains(errOut.String(), `command="benchmark.run" op="write_comparison"`) {
+		t.Fatalf("stderr = %q, want write_comparison progress", errOut.String())
+	}
+	var got benchmarkSuiteSummary
+	if err := json.Unmarshal(out.Bytes(), &got); err != nil {
+		t.Fatalf("Unmarshal JSON: %v\nstdout=%s\nstderr=%s", err, out.String(), errOut.String())
+	}
+}
+
 func TestDoctorJSONReportsOptionalSynthesisStage(t *testing.T) {
 	cmd, out := newTestCommand(t)
 	synthesisPrompt := filepath.Join(t.TempDir(), "synthesis-v1.md")
@@ -424,6 +517,7 @@ func TestRunExecutesSelectedMatrixAndWritesArtifacts(t *testing.T) {
 	}
 	wantFirstArgs := []string{
 		"--profile", "home",
+		"--quiet",
 		"review", "https://github.com/open-cli-collective/codereview-cli/pull/1",
 		"--dry-run", "--json",
 		"--selection-model", "claude-sonnet-4-6",
@@ -440,6 +534,7 @@ func TestRunExecutesSelectedMatrixAndWritesArtifacts(t *testing.T) {
 	}
 	wantSecondArgs := []string{
 		"--profile", "home",
+		"--quiet",
 		"review", "https://github.com/open-cli-collective/codereview-cli/pull/2",
 		"--dry-run", "--json",
 		"--review-base-sha", "1111111",
@@ -461,6 +556,7 @@ func TestRunExecutesSelectedMatrixAndWritesArtifacts(t *testing.T) {
 	}
 	wantThirdArgs := []string{
 		"--profile", "home",
+		"--quiet",
 		"review", "https://github.com/open-cli-collective/codereview-cli/pull/1",
 		"--dry-run", "--json",
 		"--selection-model", "kimi",
@@ -475,6 +571,7 @@ func TestRunExecutesSelectedMatrixAndWritesArtifacts(t *testing.T) {
 	}
 	wantFourthArgs := []string{
 		"--profile", "home",
+		"--quiet",
 		"review", "https://github.com/open-cli-collective/codereview-cli/pull/2",
 		"--dry-run", "--json",
 		"--review-base-sha", "1111111",
@@ -935,19 +1032,26 @@ func TestRunReusesExplicitResultsDirAndOverwritesOwnedArtifacts(t *testing.T) {
 }
 
 func newTestCommand(t *testing.T) (*cobra.Command, *bytes.Buffer) {
+	cmd, out, _ := newTestCommandWithStderr(t, true)
+	return cmd, out
+}
+
+func newTestCommandWithStderr(t *testing.T, quiet bool) (*cobra.Command, *bytes.Buffer, *bytes.Buffer) {
 	t.Helper()
 	cfgPath := filepath.Join(t.TempDir(), "config.yml")
 	if err := config.Save(cfgPath, testConfig()); err != nil {
 		t.Fatalf("config Save: %v", err)
 	}
 	var out bytes.Buffer
+	var errOut bytes.Buffer
 	cmd, opts := root.NewCommandWithOptions(&root.Options{
 		ConfigPath: cfgPath,
 		Stdout:     &out,
-		Stderr:     &bytes.Buffer{},
+		Stderr:     &errOut,
+		Quiet:      quiet,
 	})
 	Register(cmd, opts)
-	return cmd, &out
+	return cmd, &out, &errOut
 }
 
 func writeBenchmarkSuite(t *testing.T, body string) string {

diff --git a/internal/cmd/benchmarkcmd/compare.go b/internal/cmd/benchmarkcmd/compare.go
@@ -13,6 +13,7 @@ import (
 	"github.com/open-cli-collective/codereview-cli/internal/benchmark"
 	"github.com/open-cli-collective/codereview-cli/internal/cmd/exitcode"
 	"github.com/open-cli-collective/codereview-cli/internal/cmd/root"
+	"github.com/open-cli-collective/codereview-cli/internal/progress"
 	"github.com/open-cli-collective/codereview-cli/internal/view"
 )
 
@@ -183,7 +184,8 @@ func newCompareCommand(opts *root.Options) *cobra.Command {
 			return nil
 		},
 		RunE: func(_ *cobra.Command, args []string) error {
-			comparison, err := writeComparisonArtifactsForResultsDir(args[0])
+			logger := newProgressLogger(opts)
+			comparison, err := writeComparisonArtifactsForResultsDirWithProgress(args[0], logger)
 			if err != nil {
 				return mapBenchmarkError(err)
 			}
@@ -200,17 +202,29 @@ func newCompareCommand(opts *root.Options) *cobra.Command {
 }
 
 func writeComparisonArtifactsForResultsDir(resultsDir string) (comparisonReport, error) {
+	return writeComparisonArtifactsForResultsDirWithProgress(resultsDir, progress.New(nil, true, nil))
+}
+
+func writeComparisonArtifactsForResultsDirWithProgress(resultsDir string, logger *progress.Logger) (comparisonReport, error) {
+	resolveSpan := logger.Start("benchmark.compare", "load_summary", "results")
 	summary, resolvedDir, err := loadBenchmarkSummaryForCompare(resultsDir)
 	if err != nil {
-		return comparisonReport{}, err
+		return comparisonReport{}, endProgressSpan(resolveSpan, err)
 	}
+	resolveSpan.End(nil)
+	buildSpan := logger.Start("benchmark.compare", "build_comparison", summary.SuiteID)
 	comparison := buildComparison(summary, resolvedDir)
+	buildSpan.End(nil)
+	jsonSpan := logger.Start("benchmark.compare", "write_json", summary.SuiteID)
 	if err := writeJSONFile(comparison.Artifacts.ComparisonJSON, comparison); err != nil {
-		return comparisonReport{}, err
+		return comparisonReport{}, endProgressSpan(jsonSpan, err)
 	}
+	jsonSpan.End(nil)
+	mdSpan := logger.Start("benchmark.compare", "write_markdown", summary.SuiteID)
 	if err := writeArtifactFile(comparison.Artifacts.ComparisonMarkdown, []byte(renderComparisonMarkdown(comparison))); err != nil {
-		return comparisonReport{}, err
+		return comparisonReport{}, endProgressSpan(mdSpan, err)
 	}
+	mdSpan.End(nil)
 	return comparison, nil
 }
 

diff --git a/internal/cmd/benchmarkcmd/progress.go b/internal/cmd/benchmarkcmd/progress.go
@@ -0,0 +1,20 @@
+package benchmarkcmd
+
+import (
+	"github.com/open-cli-collective/codereview-cli/internal/cmd/root"
+	"github.com/open-cli-collective/codereview-cli/internal/progress"
+)
+
+func newProgressLogger(opts *root.Options) *progress.Logger {
+	if opts == nil {
+		return progress.New(nil, true, nil)
+	}
+	return progress.New(opts.Stderr, opts.Quiet, nil)
+}
+
+func endProgressSpan(span *progress.Span, err error) error {
+	if span != nil {
+		span.End(err)
+	}
+	return err
+}