diff --git a/docs/checkout-native-review-contract.md b/docs/checkout-native-review-contract.md index e6ee9b3..e011c30 100644 --- a/docs/checkout-native-review-contract.md +++ b/docs/checkout-native-review-contract.md @@ -240,11 +240,31 @@ stuffed diffs or full file bodies. Specialist reviewers must return structured output that includes: -- findings -- evidence and anchors -- confidence -- inspected areas -- skipped areas or declared constraints - -Rollup must treat isolated reviewer failures as incomplete coverage and must not -turn a partially reviewed PR into a clean approval. +- `findings`, each with severity, changed-file path, anchor, and body +- `inspected_files`, listing assigned changed files the reviewer actually inspected +- `skipped_files`, listing assigned changed files the reviewer intentionally did not or could not inspect +- `constraints`, listing material scope, context, or tool constraints + +Rollup receives compact reviewer coverage summaries derived from those fields. +`allowed_files` is intentional scope narrowing, not incomplete coverage by +itself. Isolated reviewer failures, skipped files, missing reviewer results, and +unassigned changed files are incomplete coverage and must not turn into a clean +approval silently. + +Coverage uses two related scopes: + +- readable files: `allowed_files` when present, otherwise all changed files in + the workbench +- assignment scope: `allowed_files` when present, otherwise `files` when the + orchestrator supplied them, otherwise all changed files + +The coverage status values are: + +- `complete_broad`: a reviewer without `allowed_files` covered its assignment +- `complete_constrained`: a reviewer with `allowed_files` covered that narrowed + assignment +- `incomplete_skipped`: assigned files were skipped or not reported as inspected +- `incomplete_failed`: an isolated reviewer failure or missing reviewer result + prevented coverage +- `incomplete_unassigned`: changed files were not assigned to any selected + reviewer diff --git a/internal/cmd/reviewcmd/reviewcmd.go b/internal/cmd/reviewcmd/reviewcmd.go index 9f8d96c..38cbb12 100644 --- a/internal/cmd/reviewcmd/reviewcmd.go +++ b/internal/cmd/reviewcmd/reviewcmd.go @@ -462,6 +462,7 @@ func newReviewSummary(summary reviewplan.Summary) view.ReviewSummary { Model: summary.Run.Model, PostingIdentity: summary.Run.PostingIdentity, SelectedReviewers: summary.Run.SelectedReviewers, + ReviewerCoverage: []view.ReviewReviewerCoverageSummary{}, WallDurationMS: summary.Run.WallDurationMS, Workstreams: []view.ReviewWorkstream{}, }, @@ -477,6 +478,17 @@ func newReviewSummary(summary reviewplan.Summary) view.ReviewSummary { for _, reviewer := range summary.Reviewers { out.Reviewers = append(out.Reviewers, view.ReviewReviewerSummary{Name: reviewer.Name, Findings: reviewer.Findings}) } + for _, coverage := range summary.Run.ReviewerCoverage { + out.Run.ReviewerCoverage = append(out.Run.ReviewerCoverage, view.ReviewReviewerCoverageSummary{ + AgentID: coverage.AgentID, + Status: coverage.Status, + Scope: coverage.Scope, + InspectedFiles: coverage.InspectedFiles, + SkippedFiles: coverage.SkippedFiles, + Constraints: coverage.Constraints, + Diagnostic: coverage.Diagnostic, + }) + } for _, workstream := range summary.Run.Workstreams { out.Run.Workstreams = append(out.Run.Workstreams, view.ReviewWorkstream{ Name: workstream.Name, diff --git a/internal/cmd/reviewcmd/reviewcmd_test.go b/internal/cmd/reviewcmd/reviewcmd_test.go index b8fbb24..0a80c5d 100644 --- a/internal/cmd/reviewcmd/reviewcmd_test.go +++ b/internal/cmd/reviewcmd/reviewcmd_test.go @@ -2314,8 +2314,14 @@ func TestNewReviewDryRunMapsPlanSummary(t *testing.T) { Model: "sonnet", PostingIdentity: "review-bot", SelectedReviewers: []string{"go:tests"}, - WallDurationMS: &wall, - Workstreams: []reviewplan.WorkstreamUsage{{Name: "go:tests", Model: "sonnet", TokensIn: &tokensIn}}, + ReviewerCoverage: []reviewplan.ReviewerCoverageSummary{{ + AgentID: "go:tests", + Status: "complete_broad", + Scope: []string{"main.go"}, + InspectedFiles: []string{"main.go"}, + }}, + WallDurationMS: &wall, + Workstreams: []reviewplan.WorkstreamUsage{{Name: "go:tests", Model: "sonnet", TokensIn: &tokensIn}}, }, Totals: reviewplan.AggregateUsage{TokensIn: &tokensIn}, } @@ -2342,6 +2348,13 @@ func TestNewReviewDryRunMapsPlanSummary(t *testing.T) { run.Workstreams[0].CostUSD != nil { t.Fatalf("summary workstreams = %#v", run.Workstreams) } + if len(run.ReviewerCoverage) != 1 || + run.ReviewerCoverage[0].AgentID != "go:tests" || + run.ReviewerCoverage[0].Status != "complete_broad" || + len(run.ReviewerCoverage[0].InspectedFiles) != 1 || + run.ReviewerCoverage[0].InspectedFiles[0] != "main.go" { + t.Fatalf("reviewer coverage = %#v", run.ReviewerCoverage) + } if rendered.Summary.Totals.TokensIn == nil || *rendered.Summary.Totals.TokensIn != tokensIn || rendered.Summary.Totals.CostUSD != nil { t.Fatalf("summary totals = %#v", rendered.Summary.Totals) } diff --git a/internal/llm/adapter_test.go b/internal/llm/adapter_test.go index aa7d89a..8b8133e 100644 --- a/internal/llm/adapter_test.go +++ b/internal/llm/adapter_test.go @@ -94,6 +94,7 @@ func TestFakeAdapterAndRunStructured(t *testing.T) { adapter.Queue(FakeResult{Response: Response{StructuredOutput: []byte(`{ "schema_version": 1, "agent_id": "agent-1", + "inspected_files": ["main.go"], "findings": [{ "severity": "major", "file": "main.go", @@ -128,6 +129,7 @@ func TestFakeAdapterAndRunStructured(t *testing.T) { adapter.Queue(FakeResult{Response: Response{StructuredOutput: []byte(`{ "schema_version": 1, "agent_id": "agent-1", + "inspected_files": ["main.go"], "findings": [{ "severity": "major", "file": "main.go", diff --git a/internal/llm/contracts.go b/internal/llm/contracts.go index eb03d57..62c4211 100644 --- a/internal/llm/contracts.go +++ b/internal/llm/contracts.go @@ -24,6 +24,9 @@ const ( // DefaultMaxResolvedThreads is the default selected thread resolution cap. DefaultMaxResolvedThreads = 25 + + defaultMaxCoverageConstraints = 10 + defaultMaxCoverageConstraintRunes = 300 ) // Selection is validated orchestrator selection output. @@ -52,8 +55,11 @@ type SelectionOptions struct { // Findings is validated reviewer findings output. type Findings struct { - AgentID string - Findings []review.Finding + AgentID string + Findings []review.Finding + InspectedFiles []string + SkippedFiles []string + Constraints []string } // FindingIDGenerator assigns harness-owned finding IDs. @@ -99,9 +105,12 @@ type threadActionWire struct { } type findingsWire struct { - SchemaVersion int `json:"schema_version"` - AgentID string `json:"agent_id"` - Findings []findingWire `json:"findings"` + SchemaVersion int `json:"schema_version"` + AgentID string `json:"agent_id"` + InspectedFiles []string `json:"inspected_files"` + SkippedFiles []string `json:"skipped_files,omitempty"` + Constraints []string `json:"constraints,omitempty"` + Findings []findingWire `json:"findings"` } type findingWire struct { @@ -232,7 +241,31 @@ func DecodeFindings(data []byte, opts FindingsOptions) (Findings, error) { seenIDs := map[review.FindingID]bool{} severityCounts := map[review.Severity]int{} - result := Findings{AgentID: wire.AgentID} + inspected, err := decodeCoverageFiles("inspected_files", wire.InspectedFiles, opts.ChangedFiles) + if err != nil { + return Findings{}, err + } + skipped, err := decodeCoverageFiles("skipped_files", wire.SkippedFiles, opts.ChangedFiles) + if err != nil { + return Findings{}, err + } + if len(inspected) == 0 && len(skipped) == 0 { + return Findings{}, fmt.Errorf("llm: inspected_files or skipped_files must contain at least one changed file") + } + if err := validateCoverageFileDisjoint(inspected, skipped); err != nil { + return Findings{}, err + } + constraints, err := decodeCoverageStrings("constraints", wire.Constraints) + if err != nil { + return Findings{}, err + } + + result := Findings{ + AgentID: wire.AgentID, + InspectedFiles: inspected, + SkippedFiles: skipped, + Constraints: constraints, + } for _, found := range wire.Findings { if len(found.FindingID) > 0 { return Findings{}, fmt.Errorf("llm: model-supplied finding_id is not allowed") @@ -285,6 +318,59 @@ func DecodeFindings(data []byte, opts FindingsOptions) (Findings, error) { return result, nil } +func decodeCoverageFiles(name string, files []string, changedFiles map[string]bool) ([]string, error) { + out := make([]string, 0, len(files)) + seen := map[string]bool{} + for _, file := range files { + file = strings.TrimSpace(file) + if file == "" || !changedFiles[file] { + return nil, fmt.Errorf("llm: %s entry %q is not in changed files", name, file) + } + if seen[file] { + return nil, fmt.Errorf("llm: duplicate %s entry %q", name, file) + } + seen[file] = true + out = append(out, file) + } + return out, nil +} + +func decodeCoverageStrings(name string, values []string) ([]string, error) { + if len(values) > defaultMaxCoverageConstraints { + return nil, fmt.Errorf("llm: %s cap exceeded", name) + } + out := make([]string, 0, len(values)) + seen := map[string]bool{} + for _, value := range values { + value = sanitize(value) + if strings.TrimSpace(value) == "" { + return nil, fmt.Errorf("llm: %s entries must be non-empty", name) + } + if utf8.RuneCountInString(value) > defaultMaxCoverageConstraintRunes { + return nil, fmt.Errorf("llm: %s entry length out of bounds", name) + } + if seen[value] { + return nil, fmt.Errorf("llm: duplicate %s entry %q", name, value) + } + seen[value] = true + out = append(out, value) + } + return out, nil +} + +func validateCoverageFileDisjoint(inspected, skipped []string) error { + seen := map[string]bool{} + for _, file := range inspected { + seen[file] = true + } + for _, file := range skipped { + if seen[file] { + return fmt.Errorf("llm: coverage file %q cannot be both inspected and skipped", file) + } + } + return nil +} + func (wire findingWire) normalizedFilePath() (string, error) { filePath, hasFilePath, err := decodeOptionalStringField("file_path", wire.FilePath) if err != nil { diff --git a/internal/llm/contracts_test.go b/internal/llm/contracts_test.go index a3a0bca..8bfcfaf 100644 --- a/internal/llm/contracts_test.go +++ b/internal/llm/contracts_test.go @@ -57,6 +57,9 @@ func TestDecodeFindings(t *testing.T) { got, err := DecodeFindings([]byte(`{ "schema_version": 1, "agent_id": "agent-1", + "inspected_files": ["main.go"], + "skipped_files": [], + "constraints": ["scope "], "findings": [{ "severity":"major", "file_path":"main.go", @@ -70,6 +73,12 @@ func TestDecodeFindings(t *testing.T) { if got.AgentID != "agent-1" || got.Findings[0].ID != "f-1" || got.Findings[0].Severity != review.SeverityMajor { t.Fatalf("DecodeFindings = %#v", got) } + if len(got.InspectedFiles) != 1 || got.InspectedFiles[0] != "main.go" { + t.Fatalf("inspected files = %#v, want main.go", got.InspectedFiles) + } + if len(got.Constraints) != 1 || strings.Contains(got.Constraints[0], "