Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions docs/checkout-native-review-contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,31 @@ stuffed diffs or full file bodies.

Comment thread
monit-reviewer marked this conversation as resolved.
Specialist reviewers must return structured output that includes:

- findings
- evidence and anchors
- confidence
- inspected areas
- skipped areas or declared constraints

Rollup must treat isolated reviewer failures as incomplete coverage and must not
turn a partially reviewed PR into a clean approval.
- `findings`, each with severity, changed-file path, anchor, and body
- `inspected_files`, listing assigned changed files the reviewer actually inspected
- `skipped_files`, listing assigned changed files the reviewer intentionally did not or could not inspect
- `constraints`, listing material scope, context, or tool constraints

Rollup receives compact reviewer coverage summaries derived from those fields.
`allowed_files` is intentional scope narrowing, not incomplete coverage by
itself. Isolated reviewer failures, skipped files, missing reviewer results, and
unassigned changed files are incomplete coverage and must not turn into a clean
approval silently.

Coverage uses two related scopes:

- readable files: `allowed_files` when present, otherwise all changed files in
the workbench
- assignment scope: `allowed_files` when present, otherwise `files` when the
orchestrator supplied them, otherwise all changed files

The coverage status values are:

- `complete_broad`: a reviewer without `allowed_files` covered its assignment
- `complete_constrained`: a reviewer with `allowed_files` covered that narrowed
assignment
- `incomplete_skipped`: assigned files were skipped or not reported as inspected
- `incomplete_failed`: an isolated reviewer failure or missing reviewer result
prevented coverage
- `incomplete_unassigned`: changed files were not assigned to any selected
reviewer
12 changes: 12 additions & 0 deletions internal/cmd/reviewcmd/reviewcmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ func newReviewSummary(summary reviewplan.Summary) view.ReviewSummary {
Model: summary.Run.Model,
PostingIdentity: summary.Run.PostingIdentity,
SelectedReviewers: summary.Run.SelectedReviewers,
ReviewerCoverage: []view.ReviewReviewerCoverageSummary{},
WallDurationMS: summary.Run.WallDurationMS,
Workstreams: []view.ReviewWorkstream{},
},
Expand All @@ -477,6 +478,17 @@ func newReviewSummary(summary reviewplan.Summary) view.ReviewSummary {
for _, reviewer := range summary.Reviewers {
out.Reviewers = append(out.Reviewers, view.ReviewReviewerSummary{Name: reviewer.Name, Findings: reviewer.Findings})
}
for _, coverage := range summary.Run.ReviewerCoverage {
out.Run.ReviewerCoverage = append(out.Run.ReviewerCoverage, view.ReviewReviewerCoverageSummary{
AgentID: coverage.AgentID,
Status: coverage.Status,
Scope: coverage.Scope,
InspectedFiles: coverage.InspectedFiles,
SkippedFiles: coverage.SkippedFiles,
Constraints: coverage.Constraints,
Diagnostic: coverage.Diagnostic,
})
}
for _, workstream := range summary.Run.Workstreams {
out.Run.Workstreams = append(out.Run.Workstreams, view.ReviewWorkstream{
Name: workstream.Name,
Expand Down
17 changes: 15 additions & 2 deletions internal/cmd/reviewcmd/reviewcmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2314,8 +2314,14 @@ func TestNewReviewDryRunMapsPlanSummary(t *testing.T) {
Model: "sonnet",
PostingIdentity: "review-bot",
SelectedReviewers: []string{"go:tests"},
WallDurationMS: &wall,
Workstreams: []reviewplan.WorkstreamUsage{{Name: "go:tests", Model: "sonnet", TokensIn: &tokensIn}},
ReviewerCoverage: []reviewplan.ReviewerCoverageSummary{{
AgentID: "go:tests",
Status: "complete_broad",
Scope: []string{"main.go"},
InspectedFiles: []string{"main.go"},
}},
WallDurationMS: &wall,
Workstreams: []reviewplan.WorkstreamUsage{{Name: "go:tests", Model: "sonnet", TokensIn: &tokensIn}},
},
Totals: reviewplan.AggregateUsage{TokensIn: &tokensIn},
}
Expand All @@ -2342,6 +2348,13 @@ func TestNewReviewDryRunMapsPlanSummary(t *testing.T) {
run.Workstreams[0].CostUSD != nil {
t.Fatalf("summary workstreams = %#v", run.Workstreams)
}
if len(run.ReviewerCoverage) != 1 ||
run.ReviewerCoverage[0].AgentID != "go:tests" ||
run.ReviewerCoverage[0].Status != "complete_broad" ||
len(run.ReviewerCoverage[0].InspectedFiles) != 1 ||
run.ReviewerCoverage[0].InspectedFiles[0] != "main.go" {
t.Fatalf("reviewer coverage = %#v", run.ReviewerCoverage)
}
if rendered.Summary.Totals.TokensIn == nil || *rendered.Summary.Totals.TokensIn != tokensIn || rendered.Summary.Totals.CostUSD != nil {
t.Fatalf("summary totals = %#v", rendered.Summary.Totals)
}
Expand Down
2 changes: 2 additions & 0 deletions internal/llm/adapter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ func TestFakeAdapterAndRunStructured(t *testing.T) {
adapter.Queue(FakeResult{Response: Response{StructuredOutput: []byte(`{
"schema_version": 1,
"agent_id": "agent-1",
"inspected_files": ["main.go"],
"findings": [{
"severity": "major",
"file": "main.go",
Expand Down Expand Up @@ -128,6 +129,7 @@ func TestFakeAdapterAndRunStructured(t *testing.T) {
adapter.Queue(FakeResult{Response: Response{StructuredOutput: []byte(`{
"schema_version": 1,
"agent_id": "agent-1",
"inspected_files": ["main.go"],
"findings": [{
"severity": "major",
"file": "main.go",
Expand Down
98 changes: 92 additions & 6 deletions internal/llm/contracts.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ const (

// DefaultMaxResolvedThreads is the default selected thread resolution cap.
DefaultMaxResolvedThreads = 25

defaultMaxCoverageConstraints = 10
defaultMaxCoverageConstraintRunes = 300
)

// Selection is validated orchestrator selection output.
Expand Down Expand Up @@ -52,8 +55,11 @@ type SelectionOptions struct {

// Findings is validated reviewer findings output.
type Findings struct {
AgentID string
Findings []review.Finding
AgentID string
Findings []review.Finding
InspectedFiles []string
SkippedFiles []string
Constraints []string
}

// FindingIDGenerator assigns harness-owned finding IDs.
Expand Down Expand Up @@ -99,9 +105,12 @@ type threadActionWire struct {
}

type findingsWire struct {
SchemaVersion int `json:"schema_version"`
AgentID string `json:"agent_id"`
Findings []findingWire `json:"findings"`
SchemaVersion int `json:"schema_version"`
AgentID string `json:"agent_id"`
InspectedFiles []string `json:"inspected_files"`
SkippedFiles []string `json:"skipped_files,omitempty"`
Constraints []string `json:"constraints,omitempty"`
Findings []findingWire `json:"findings"`
}

type findingWire struct {
Expand Down Expand Up @@ -232,7 +241,31 @@ func DecodeFindings(data []byte, opts FindingsOptions) (Findings, error) {

seenIDs := map[review.FindingID]bool{}
severityCounts := map[review.Severity]int{}
result := Findings{AgentID: wire.AgentID}
inspected, err := decodeCoverageFiles("inspected_files", wire.InspectedFiles, opts.ChangedFiles)
if err != nil {
return Findings{}, err
}
skipped, err := decodeCoverageFiles("skipped_files", wire.SkippedFiles, opts.ChangedFiles)
if err != nil {
return Findings{}, err
}
if len(inspected) == 0 && len(skipped) == 0 {
return Findings{}, fmt.Errorf("llm: inspected_files or skipped_files must contain at least one changed file")
}
if err := validateCoverageFileDisjoint(inspected, skipped); err != nil {
return Findings{}, err
}
constraints, err := decodeCoverageStrings("constraints", wire.Constraints)
if err != nil {
return Findings{}, err
}

result := Findings{
AgentID: wire.AgentID,
InspectedFiles: inspected,
SkippedFiles: skipped,
Constraints: constraints,
}
for _, found := range wire.Findings {
if len(found.FindingID) > 0 {
return Findings{}, fmt.Errorf("llm: model-supplied finding_id is not allowed")
Expand Down Expand Up @@ -285,6 +318,59 @@ func DecodeFindings(data []byte, opts FindingsOptions) (Findings, error) {
return result, nil
}

func decodeCoverageFiles(name string, files []string, changedFiles map[string]bool) ([]string, error) {
out := make([]string, 0, len(files))
seen := map[string]bool{}
for _, file := range files {
file = strings.TrimSpace(file)
if file == "" || !changedFiles[file] {
return nil, fmt.Errorf("llm: %s entry %q is not in changed files", name, file)
}
if seen[file] {
return nil, fmt.Errorf("llm: duplicate %s entry %q", name, file)
}
seen[file] = true
out = append(out, file)
}
return out, nil
}

func decodeCoverageStrings(name string, values []string) ([]string, error) {
if len(values) > defaultMaxCoverageConstraints {
return nil, fmt.Errorf("llm: %s cap exceeded", name)
}
out := make([]string, 0, len(values))
seen := map[string]bool{}
for _, value := range values {
value = sanitize(value)
if strings.TrimSpace(value) == "" {
return nil, fmt.Errorf("llm: %s entries must be non-empty", name)
}
if utf8.RuneCountInString(value) > defaultMaxCoverageConstraintRunes {
return nil, fmt.Errorf("llm: %s entry length out of bounds", name)
}
if seen[value] {
return nil, fmt.Errorf("llm: duplicate %s entry %q", name, value)
}
seen[value] = true
out = append(out, value)
}
return out, nil
}

func validateCoverageFileDisjoint(inspected, skipped []string) error {
seen := map[string]bool{}
for _, file := range inspected {
seen[file] = true
}
for _, file := range skipped {
if seen[file] {
return fmt.Errorf("llm: coverage file %q cannot be both inspected and skipped", file)
}
}
return nil
}

func (wire findingWire) normalizedFilePath() (string, error) {
filePath, hasFilePath, err := decodeOptionalStringField("file_path", wire.FilePath)
if err != nil {
Expand Down
Loading
Loading