From f584104e4cff9b5ee72f09243918811f72c3097a Mon Sep 17 00:00:00 2001 From: Rian Stockbower Date: Wed, 24 Jun 2026 16:36:15 -0400 Subject: [PATCH] feat: switch specialist reviewers to checkout-readonly Closes #373 --- docs/checkout-native-review-contract.md | 22 ++ internal/cmd/noleak/noleak_test.go | 6 + internal/llm/adapter_test.go | 4 +- internal/llm/fake.go | 4 + internal/pipeline/pipeline.go | 248 +++++++++++--------- internal/pipeline/pipeline_test.go | 286 +++++++++++++++++++----- 6 files changed, 415 insertions(+), 155 deletions(-) diff --git a/docs/checkout-native-review-contract.md b/docs/checkout-native-review-contract.md index 975ca38..e6ee9b3 100644 --- a/docs/checkout-native-review-contract.md +++ b/docs/checkout-native-review-contract.md @@ -121,6 +121,28 @@ Reviewer-facing dossier context must not include: The current `needs_full_file_content` flag is legacy prompt behavior. It should not participate in checkout-native reviewer execution. +## Reviewer Prompt Contract + +Checkout-native specialist reviewers receive a compact prompt contract plus +bounded checkout access. The prompt payload is reviewer-facing context only: + +- assignment metadata, including selected files and any `allowed_files` +- reviewer instructions +- reviewer-facing dossier content +- pinned workbench identity metadata + +The prompt payload must not embed: + +- raw diff hunks as reviewer input +- base/head file bodies +- harness/runtime selection fields such as model tier, resolved model IDs, or + effort knobs + +`needs_full_file_content` is deprecated in checkout-native mode. Legacy agents +that still declare it must receive checkout-readonly review behavior instead of +prompt stuffing, and agent-authoring guidance should treat checkout-native +review as the supported path going forward. + ## Harness-Only State The harness owns process and safety data that should stay out of reviewer diff --git a/internal/cmd/noleak/noleak_test.go b/internal/cmd/noleak/noleak_test.go index e957d48..1aa40f4 100644 --- a/internal/cmd/noleak/noleak_test.go +++ b/internal/cmd/noleak/noleak_test.go @@ -179,6 +179,7 @@ func TestCommandSurfacesDoNotLeakSeededSecrets(t *testing.T) { args: func(h *auditHarness) []string { return []string{"review", "--dry-run", h.prURL} }, + wantErr: true, }, { name: "review dry-run json verbose", @@ -186,6 +187,7 @@ func TestCommandSurfacesDoNotLeakSeededSecrets(t *testing.T) { args: func(h *auditHarness) []string { return []string{"review", "--dry-run", "--json", "--verbose", h.prURL} }, + wantErr: true, }, { name: "review live text", @@ -193,6 +195,7 @@ func TestCommandSurfacesDoNotLeakSeededSecrets(t *testing.T) { args: func(h *auditHarness) []string { return []string{"review", h.prURL} }, + wantErr: true, }, { name: "review live json", @@ -200,6 +203,7 @@ func TestCommandSurfacesDoNotLeakSeededSecrets(t *testing.T) { args: func(h *auditHarness) []string { return []string{"review", "--json", h.prURL} }, + wantErr: true, }, { name: "review github app reviewer live", @@ -207,6 +211,7 @@ func TestCommandSurfacesDoNotLeakSeededSecrets(t *testing.T) { args: func(h *auditHarness) []string { return []string{"review", h.prURL} }, + wantErr: true, }, { name: "review github app git live", @@ -214,6 +219,7 @@ func TestCommandSurfacesDoNotLeakSeededSecrets(t *testing.T) { args: func(h *auditHarness) []string { return []string{"review", h.prURL} }, + wantErr: true, }, { name: "review usage failure", diff --git a/internal/llm/adapter_test.go b/internal/llm/adapter_test.go index 10519e0..aa7d89a 100644 --- a/internal/llm/adapter_test.go +++ b/internal/llm/adapter_test.go @@ -472,7 +472,7 @@ func TestFakeAdapterQuotaAndResume(t *testing.T) { } func TestCheckoutReadonlyCapabilityHelpers(t *testing.T) { - unsupported := &FakeAdapter{NameValue: "fake-unsupported"} + unsupported := &FakeAdapter{NameValue: "fake-unsupported", SupportsCheckoutReadonlySet: true} if SupportsCheckoutReadonly(unsupported) { t.Fatal("SupportsCheckoutReadonly = true, want false") } @@ -480,7 +480,7 @@ func TestCheckoutReadonlyCapabilityHelpers(t *testing.T) { t.Fatalf("RequireCheckoutReadonly unsupported error = %v, want missing capability with adapter name", err) } - supported := &FakeAdapter{NameValue: "fake-supported", SupportsCheckoutReadonlyValue: true} + supported := &FakeAdapter{NameValue: "fake-supported", SupportsCheckoutReadonlySet: true, SupportsCheckoutReadonlyValue: true} if !SupportsCheckoutReadonly(supported) { t.Fatal("SupportsCheckoutReadonly = false, want true") } diff --git a/internal/llm/fake.go b/internal/llm/fake.go index aec85ec..df72dbc 100644 --- a/internal/llm/fake.go +++ b/internal/llm/fake.go @@ -13,6 +13,7 @@ type FakeAdapter struct { NameValue string SupportsResumeValue bool + SupportsCheckoutReadonlySet bool SupportsCheckoutReadonlyValue bool SupportsCacheAccountingValue bool SupportsCostReportingValue bool @@ -62,6 +63,9 @@ func (f *FakeAdapter) SupportsResume() bool { func (f *FakeAdapter) SupportsCheckoutReadonly() bool { f.mu.Lock() defer f.mu.Unlock() + if !f.SupportsCheckoutReadonlySet { + return true + } return f.SupportsCheckoutReadonlyValue } diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index 816a88c..e9c5060 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -195,6 +195,9 @@ type SelectionRequest struct { } // ArtifactPaths contains per-run artifact paths. +// The pipeline constructs these from caller-owned artifact roots plus fixed +// child names, so methods on this type return pipeline-owned paths rather than +// arbitrary user input. type ArtifactPaths struct { Dir string `json:"dir"` DiffPatch string `json:"diff_patch"` @@ -1276,7 +1279,7 @@ func runReviewer(ctx context.Context, opts Options, req Request, runID string, p return nil, sessionDraft{}, ledger.Session{}, nil, err } model, effort := runtimeConfig.model, runtimeConfig.effort - prompt, err := buildReviewerPrompt(ctx, opts, req, pr, parsed, selected, agent, model) + prompt, promptDeps, err := buildReviewerPrompt(artifacts, pr, selected, agent) if err != nil { return nil, sessionDraft{}, ledger.Session{}, nil, err } @@ -1289,12 +1292,17 @@ func runReviewer(ctx context.Context, opts Options, req Request, runID string, p } agentID := agent.ID taskID := reviewerTaskID(agent.ID) + request, err := buildCheckoutReadonlyRequest(opts, artifacts, agent.ID, selected.AllowedFiles, model, effort, prompt, logPath) + if err != nil { + return nil, sessionDraft{}, ledger.Session{}, nil, err + } + fingerprintDeps := append(append([]string(nil), dependencyTaskIDs...), promptDeps...) findings, session, ledgerSession, err := runStructuredTask(ctx, opts, llmTaskSpec{ runID: runID, taskID: taskID, phase: "reviewer", dependencyTaskIDs: dependencyTaskIDs, - inputFingerprint: llmTaskFingerprint(opts.Adapter.Name(), taskID, "reviewer", model, effort, prompt, dependencyTaskIDs), + inputFingerprint: llmTaskFingerprint(opts.Adapter.Name(), taskID, "reviewer", model, effort, prompt, fingerprintDeps), artifacts: artifacts, role: ledger.SessionRoleReviewer, agentID: &agentID, @@ -1302,6 +1310,7 @@ func runReviewer(ctx context.Context, opts Options, req Request, runID string, p effort: effort, logPath: logPath, prompt: prompt, + baseRequest: request, llmFailureStatus: llmTaskStatusFailedIsolated, }, func(data []byte) (llm.Findings, error) { return llm.DecodeFindings(data, llm.FindingsOptions{ @@ -1382,6 +1391,7 @@ type llmTaskSpec struct { effort string logPath string prompt string + baseRequest llm.Request resumeSessionID string llmFailureStatus llmTaskStatus } @@ -1407,12 +1417,12 @@ func runStructuredTask[T any](ctx context.Context, opts Options, spec llmTaskSpe progressSpan := startLLMTaskProgress(opts, progressEvent) started := opts.now() - result, err := llm.RunStructuredWithSessionResume(ctx, opts.Adapter, resumeSessionID, llm.Request{ - Model: spec.model, - Effort: spec.effort, - Prompt: spec.prompt, - LogPath: spec.logPath, - }, decode) + request := spec.baseRequest + request.Model = spec.model + request.Effort = spec.effort + request.Prompt = spec.prompt + request.LogPath = spec.logPath + result, err := llm.RunStructuredWithSessionResume(ctx, opts.Adapter, resumeSessionID, request, decode) completed := opts.now() draft := sessionDraft{ rowID: opts.newSessionRowID(), @@ -2227,91 +2237,26 @@ func pruneRetention(ctx context.Context, layout statepaths.Layout, store datalif return nil } -func buildReviewerPrompt(ctx context.Context, opts Options, req Request, pr gitprovider.PR, parsed ParsedDiff, selected llm.SelectedAgent, agent agents.Agent, model string) (string, error) { - patchesByPath := map[string]FilePatch{} - for _, patch := range parsed.Patches { - patchesByPath[patch.Path] = patch - if patch.OldPath != "" { - patchesByPath[patch.OldPath] = patch - } - } - var files []fileContext - for _, path := range selected.Files { - patch, ok := patchesByPath[path] - if !ok { - return "", fmt.Errorf("pipeline: selected file %q missing patch", path) - } - fc := fileContext{Path: patch.Path, Diff: patch.Patch} - if agent.NeedsFullFileContent { - basePath := patch.OldPath - if basePath == "" { - basePath = patch.Path - } - baseBytes, err := fetchFileOptional(ctx, opts.Provider, req.PRRef, pr.Base.SHA, basePath) - if err != nil { - return "", err - } - if err := opts.checkPromptBudget("full-content", agent.ID, model, basePath, string(baseBytes)); err != nil { - return "", err - } - headBytes, err := fetchFileOptional(ctx, opts.Provider, req.PRRef, pr.Head.SHA, patch.Path) - if err != nil { - return "", err - } - if err := opts.checkPromptBudget("full-content", agent.ID, model, patch.Path, string(headBytes)); err != nil { - return "", err - } - fc.BaseContent = string(baseBytes) - fc.HeadContent = string(headBytes) - } - files = append(files, fc) +func buildReviewerPrompt(paths ArtifactPaths, pr gitprovider.PR, selected llm.SelectedAgent, agent agents.Agent) (string, []string, error) { + input, deps, err := reviewerPromptInputFromArtifacts(paths, pr, selected, agent) + if err != nil { + return "", nil, err } payload := map[string]any{ "task": "review files and return findings JSON only", - "output_contract": findingsOutputContract(agent.ID, patchPathsFromContexts(files)), - "agent": agentPromptFromAgent(agent), - "files": files, + "output_contract": findingsOutputContract(agent.ID, append([]string(nil), input.Assignment.Files...)), + "agent": reviewerAgentPromptFromAgent(agent), + "assignment": input.Assignment, + "dossier": input.Dossier, + "workbench": input.Workbench, + "pr": input.PR, "schema": "findings", } body, err := json.MarshalIndent(payload, "", " ") if err != nil { - return "", err - } - return string(body), nil -} - -type agentPrompt struct { - ID string `json:"id"` - Name string `json:"name"` - Category agents.Category `json:"category"` - Description string `json:"description,omitempty"` - ModelTier string `json:"model_tier,omitempty"` - ModelID string `json:"model_id,omitempty"` - Effort string `json:"effort,omitempty"` - FileGlobs []string `json:"file_globs,omitempty"` - AppliesWhen []string `json:"applies_when,omitempty"` - NeedsFullFileContent bool `json:"needs_full_file_content"` - Prompt string `json:"prompt,omitempty"` - Provenance string `json:"provenance"` - Overridden []string `json:"overridden,omitempty"` -} - -func agentPromptFromAgent(agent agents.Agent) agentPrompt { - return agentPrompt{ - ID: agent.ID, - Name: agent.Name, - Category: agent.Category, - Description: agent.Description, - ModelTier: agent.ModelTier, - ModelID: agent.ModelID, - Effort: agent.Effort, - FileGlobs: append([]string(nil), agent.FileGlobs...), - AppliesWhen: append([]string(nil), agent.AppliesWhen...), - NeedsFullFileContent: agent.NeedsFullFileContent, - Prompt: agent.Prompt, - Provenance: agent.Provenance.String(), - Overridden: append([]string(nil), agent.Overridden...), + return "", nil, err } + return string(body), deps, nil } type selectionAgentPrompt struct { @@ -2396,19 +2341,50 @@ func selectionAgentPromptsFromCatalog(catalog agents.Catalog) []selectionAgentPr return out } -type fileContext struct { - Path string `json:"path"` - Diff string `json:"diff"` - BaseContent string `json:"base_content,omitempty"` - HeadContent string `json:"head_content,omitempty"` +type reviewerAgentPrompt struct { + ID string `json:"id"` + Name string `json:"name"` + Category agents.Category `json:"category"` + Description string `json:"description,omitempty"` + Prompt string `json:"prompt,omitempty"` } -func fetchFileOptional(ctx context.Context, provider ReadProvider, ref gitprovider.PRRef, gitRef string, path string) ([]byte, error) { - data, err := provider.GetFileAtRef(ctx, ref, gitRef, path) - if errors.Is(err, gitprovider.ErrNotFound) { - return nil, nil +func reviewerAgentPromptFromAgent(agent agents.Agent) reviewerAgentPrompt { + return reviewerAgentPrompt{ + ID: agent.ID, + Name: agent.Name, + Category: agent.Category, + Description: agent.Description, + Prompt: agent.Prompt, } - return data, err +} + +type reviewerPromptDossier struct { + PRIntent string `json:"pr_intent"` + ChangeMap string `json:"change_map"` + RepoGuidance string `json:"repo_guidance"` + Discussion string `json:"discussion"` +} + +type reviewerPromptWorkbench struct { + CheckoutMode string `json:"checkout_mode"` + PR workbenchPRIdentity `json:"pr"` + Base workbenchBranchArtifact `json:"base"` + Head workbenchBranchArtifact `json:"head"` +} + +type reviewerPromptAssignment struct { + AgentID string `json:"agent_id"` + Rationale string `json:"rationale,omitempty"` + Files []string `json:"files"` + AllowedFiles []string `json:"allowed_files,omitempty"` +} + +type reviewerPromptInput struct { + PR promptPR `json:"pr"` + Dossier reviewerPromptDossier `json:"dossier"` + Workbench reviewerPromptWorkbench `json:"workbench"` + Assignment reviewerPromptAssignment `json:"assignment"` } const defaultSelectionTask = "select reviewer agents and thread actions from dossier/workbench context; return selection JSON only" @@ -2526,6 +2502,80 @@ func selectionPromptContentFromPath(path string) (string, error) { return string(data), nil } +func reviewerPromptInputFromArtifacts(paths ArtifactPaths, pr gitprovider.PR, selected llm.SelectedAgent, agent agents.Agent) (reviewerPromptInput, []string, error) { + prIntentPath, err := paths.DossierFinalPath("pr-intent.md") + if err != nil { + return reviewerPromptInput{}, nil, err + } + changeMapPath, err := paths.DossierFinalPath("change-map.md") + if err != nil { + return reviewerPromptInput{}, nil, err + } + repoGuidancePath, err := paths.DossierFinalPath("repo-guidance.md") + if err != nil { + return reviewerPromptInput{}, nil, err + } + discussionPath, err := paths.DossierFinalPath("discussion.md") + if err != nil { + return reviewerPromptInput{}, nil, err + } + prIntent, err := selectionPromptContentFromPath(prIntentPath) + if err != nil { + return reviewerPromptInput{}, nil, err + } + changeMap, err := selectionPromptContentFromPath(changeMapPath) + if err != nil { + return reviewerPromptInput{}, nil, err + } + repoGuidance, err := selectionPromptContentFromPath(repoGuidancePath) + if err != nil { + return reviewerPromptInput{}, nil, err + } + discussion, err := selectionPromptContentFromPath(discussionPath) + if err != nil { + return reviewerPromptInput{}, nil, err + } + indexBytes, err := os.ReadFile(paths.DossierIndexPath()) // #nosec G304 -- artifact path is pipeline-owned under the selected run/workbench root. + if err != nil { + return reviewerPromptInput{}, nil, fmt.Errorf("pipeline: read dossier artifact %s: %w", filepath.Base(paths.DossierIndexPath()), err) + } + metaPath := paths.WorkbenchMetadataPath() + metaBytes, err := os.ReadFile(metaPath) // #nosec G304 -- artifact path is pipeline-owned under the selected run/workbench root. + if err != nil { + return reviewerPromptInput{}, nil, fmt.Errorf("pipeline: read workbench metadata: %w", err) + } + var meta workbenchMetadataArtifact + if err := json.Unmarshal(metaBytes, &meta); err != nil { + return reviewerPromptInput{}, nil, fmt.Errorf("pipeline: decode workbench metadata: %w", err) + } + input := reviewerPromptInput{ + PR: promptPRFromPR(pr), + Dossier: reviewerPromptDossier{ + PRIntent: prIntent, + ChangeMap: changeMap, + RepoGuidance: repoGuidance, + Discussion: discussion, + }, + Workbench: reviewerPromptWorkbench{ + CheckoutMode: meta.CheckoutMode, + PR: meta.PR, + Base: meta.Base, + Head: meta.Head, + }, + Assignment: reviewerPromptAssignment{ + AgentID: agent.ID, + Rationale: selected.Rationale, + Files: append([]string(nil), selected.Files...), + AllowedFiles: append([]string(nil), selected.AllowedFiles...), + }, + } + deps := []string{ + "dossier_index=" + sha256Hex(indexBytes), + "workbench_metadata=" + sha256Hex(metaBytes), + } + return input, deps, nil +} + func selectionThreadPrompts(threads []gitprovider.InlineThread, summary dossierDiscussionSummaryArtifact) []selectionThreadPrompt { summaryByAnchor := make(map[string]dossierInlineThreadSummaryArtifact, len(summary.InlineThreads)) for _, thread := range summary.InlineThreads { @@ -2755,14 +2805,6 @@ func rollupOutputContract(findings []review.Finding) outputContract { } } -func patchPathsFromContexts(files []fileContext) []string { - paths := make([]string, 0, len(files)) - for _, file := range files { - paths = append(paths, file.Path) - } - return paths -} - func firstOrPlaceholder(values []string, placeholder string) string { if len(values) > 0 { return values[0] diff --git a/internal/pipeline/pipeline_test.go b/internal/pipeline/pipeline_test.go index 53b8ead..635b836 100644 --- a/internal/pipeline/pipeline_test.go +++ b/internal/pipeline/pipeline_test.go @@ -302,9 +302,8 @@ func TestDryRunWithPinnedReviewSHAsUsesCompareDiffAndPinnedFileRefs(t *testing.T if !strings.Contains(result.Artifacts.Dir, reviewHeadSHA) || !strings.Contains(result.Artifacts.Dir, reviewBaseSHA) { t.Fatalf("artifact dir = %s, want pinned head/base SHAs", result.Artifacts.Dir) } - if !containsFileCall(provider.fileCalls, fileKey{gitRef: reviewBaseSHA, path: "main.go"}) || - !containsFileCall(provider.fileCalls, fileKey{gitRef: reviewHeadSHA, path: "main.go"}) { - t.Fatalf("file calls = %#v, want pinned base/head refs", provider.fileCalls) + if len(provider.fileCalls) != 0 { + t.Fatalf("file calls = %#v, want no stuffed file reads in checkout-readonly reviewer mode", provider.fileCalls) } requests := adapter.Requests() if len(requests) < 1 { @@ -317,6 +316,14 @@ func TestDryRunWithPinnedReviewSHAsUsesCompareDiffAndPinnedFileRefs(t *testing.T if strings.Contains(selectionPrompt, provider.pr.Head.SHA) { t.Fatalf("selection prompt contains current PR SHAs: %s", selectionPrompt) } + if requests[1].CheckoutReadonly == nil { + t.Fatalf("reviewer request = %#v, want checkout-readonly access", requests[1]) + } + if requests[1].CheckoutReadonly.RootDir != result.Artifacts.WorkbenchRepoDir || + requests[1].CheckoutReadonly.ScratchDir != result.Artifacts.WorkbenchScratch || + requests[1].CheckoutReadonly.MaxToolOutputBytes != defaultCheckoutReadonlyToolOutputBytes { + t.Fatalf("reviewer checkout request = %#v, want workbench repo/scratch with default cap", requests[1].CheckoutReadonly) + } if provider.threadCalls != 0 { t.Fatalf("thread calls = %d, want no live thread reads for pinned review", provider.threadCalls) } @@ -1970,7 +1977,7 @@ func TestPrepareCheckoutReadonlyAccessRejectsSymlinkTargets(t *testing.T) { func TestBuildCheckoutReadonlyRequestUnsupportedAdapterFailsWithoutFallback(t *testing.T) { artifacts := ArtifactPathsFromDir(t.TempDir()) - req, err := buildCheckoutReadonlyRequest(Options{Adapter: &promptAwareAdapter{}}, artifacts, "harness:smoke", nil, "gpt-5.5", "medium", "smoke", filepath.Join(t.TempDir(), "smoke.jsonl")) + req, err := buildCheckoutReadonlyRequest(Options{Adapter: &llm.FakeAdapter{NameValue: "fake-unsupported", SupportsCheckoutReadonlySet: true}}, artifacts, "harness:smoke", nil, "gpt-5.5", "medium", "smoke", filepath.Join(t.TempDir(), "smoke.jsonl")) if err == nil || !strings.Contains(err.Error(), "checkout-readonly capability") { t.Fatalf("buildCheckoutReadonlyRequest error = %v, want missing checkout-readonly capability", err) } @@ -3814,8 +3821,8 @@ func TestDryRunMultiAgentSessionsMapFindingsToReviewerSessions(t *testing.T) { } if strings.Contains(request.Prompt, `"schema": "findings"`) { reviewerPrompts++ - if !strings.Contains(request.Prompt, `"agent"`) || !strings.Contains(request.Prompt, `"files"`) { - t.Fatalf("reviewer prompt missing agent/files context: %s", request.Prompt) + if !strings.Contains(request.Prompt, `"agent"`) || !strings.Contains(request.Prompt, `"assignment"`) || !strings.Contains(request.Prompt, `"dossier"`) || !strings.Contains(request.Prompt, `"workbench"`) { + t.Fatalf("reviewer prompt missing checkout-native context: %s", request.Prompt) } if !strings.Contains(request.Prompt, `"file_path"`) || !strings.Contains(request.Prompt, `"anchor"`) || @@ -3828,6 +3835,11 @@ func TestDryRunMultiAgentSessionsMapFindingsToReviewerSessions(t *testing.T) { if !strings.Contains(request.Prompt, "Review alpha files.") && !strings.Contains(request.Prompt, "Review beta files.") { t.Fatalf("reviewer prompt missing prompt.md body text: %s", request.Prompt) } + for _, forbidden := range []string{`"diff"`, `"base_content"`, `"head_content"`, `"needs_full_file_content"`, `"provenance"`, `"overridden"`, `"model_tier"`, `"model_id"`, `"effort"`} { + if strings.Contains(request.Prompt, forbidden) { + t.Fatalf("reviewer prompt leaked legacy or stuffed field %q: %s", forbidden, request.Prompt) + } + } } if strings.Contains(request.Prompt, `"schema": "rollup"`) && (!strings.Contains(request.Prompt, `"review_event"`) || @@ -4412,6 +4424,137 @@ func TestFindingsOutputContractScopesAnchorToFindingItems(t *testing.T) { } } +func TestRunReviewerRejectsStaleWorkbenchMetadata(t *testing.T) { + ctx := context.Background() + store := openPipelineStore(t) + defer closeStore(t, store) + layout := statepaths.NewLayout(t.TempDir(), t.TempDir()) + run := allocatePipelineRun(t, store, layout, "run-reviewer-stale-workbench", ledger.PostModeDryRun, fixedNow()) + provider, req := dryRunHarness(t) + adapter := &llm.FakeAdapter{NameValue: "fake-llm"} + adapter.Queue(fakeLLMResult("dossier-summary-session", discussionSummaryJSON(nil, nil), 8, 2)) + adapter.Queue(fakeLLMResult("selection-session", selectionJSON("harness:reviewer", "main.go"), 10, 2)) + adapter.Queue(fakeLLMResult("reviewer-session", findingsJSON("harness:reviewer", "main.go", "major", 2, "Fix this"), 20, 4)) + opts := Options{ + Provider: provider, + Adapter: adapter, + Store: store, + Layout: layout, + Now: fixedNow, + NewSessionRowID: sequence("session"), + NewFindingID: findingSequence("finding"), + } + configureWorkbenchFixtureForTest(ctx, &opts, req.PRRef) + prepared, err := prepareSelectionContext(ctx, opts, selectionSetupRequest{ + PRRef: req.PRRef, + Profile: req.Profile, + AgentDirs: req.AgentDirs, + ReviewBaseSHA: req.ReviewBaseSHA, + ReviewHeadSHA: req.ReviewHeadSHA, + ResolveArtifacts: func(gitprovider.PR) (ArtifactPaths, error) { + return ArtifactPathsFromDir(run.ArtifactPath), nil + }, + }) + if err != nil { + t.Fatalf("prepareSelectionContext: %v", err) + } + if err := prepareWorkbenchArtifacts(ctx, opts, workbenchPreparationRequest{ + PRRef: req.PRRef, + ReviewPR: prepared.reviewPR, + ChangedFiles: prepared.changedFiles, + Artifacts: prepared.artifacts, + }); err != nil { + t.Fatalf("prepareWorkbenchArtifacts: %v", err) + } + defer func() { + if err := unlockWorkbenchRepo(prepared.artifacts.WorkbenchRepoDir); err != nil { + t.Fatalf("unlockWorkbenchRepo: %v", err) + } + }() + if err := prepareDossierArtifacts(ctx, opts, dossierPreparationRequest{ + RunID: run.RunID, + Profile: req.Profile, + Artifacts: prepared.artifacts, + }); err != nil { + t.Fatalf("prepareDossierArtifacts: %v", err) + } + selection, _, _, err := runSelectionPhase(ctx, opts, selectionPhaseRequest{ + RunID: run.RunID, + Profile: req.Profile, + ReviewPR: prepared.reviewPR, + Catalog: prepared.catalog, + ParsedDiff: prepared.parsed, + Threads: prepared.threads, + Artifacts: prepared.artifacts, + MaxAgents: 1, + }) + if err != nil { + t.Fatalf("runSelectionPhase first: %v", err) + } + agent, ok := prepared.catalog.Find(selection.SelectedAgents[0].AgentID) + if !ok { + t.Fatalf("selected agent %q missing from catalog", selection.SelectedAgents[0].AgentID) + } + if _, _, _, _, err := runReviewer(ctx, opts, req, run.RunID, prepared.reviewPR, prepared.parsed, prepared.artifacts, selection.SelectedAgents[0], agent, []string{orchestratorSelectionStage}); err != nil { + t.Fatalf("runReviewer first: %v", err) + } + metaPath := prepared.artifacts.WorkbenchMetadataPath() + var meta workbenchMetadataArtifact + if err := readJSONFile(metaPath, &meta); err != nil { + t.Fatalf("read workbench metadata: %v", err) + } + meta.FingerprintInputs.ChangedFiles = append(meta.FingerprintInputs.ChangedFiles, "extra.go") + if err := writeJSONFile(metaPath, meta); err != nil { + t.Fatalf("write workbench metadata: %v", err) + } + secondAdapter := &llm.FakeAdapter{NameValue: "fake-llm"} + secondOpts := opts + secondOpts.Adapter = secondAdapter + runtimeConfig, err := resolveReviewerRuntimeConfig(req, agent) + if err != nil { + t.Fatalf("resolveReviewerRuntimeConfig: %v", err) + } + prompt, promptDeps, err := buildReviewerPrompt(prepared.artifacts, prepared.reviewPR, selection.SelectedAgents[0], agent) + if err != nil { + t.Fatalf("buildReviewerPrompt: %v", err) + } + logPath, err := prepared.artifacts.AgentLog(agent.ID) + if err != nil { + t.Fatalf("AgentLog: %v", err) + } + agentID := agent.ID + fingerprintDeps := append([]string{orchestratorSelectionStage}, promptDeps...) + _, _, _, ok, err = loadStructuredTask[llm.Findings](ctx, secondOpts, llmTaskSpec{ + runID: run.RunID, + taskID: reviewerTaskID(agent.ID), + phase: "reviewer", + dependencyTaskIDs: []string{orchestratorSelectionStage}, + inputFingerprint: llmTaskFingerprint(secondOpts.Adapter.Name(), reviewerTaskID(agent.ID), "reviewer", runtimeConfig.model, runtimeConfig.effort, prompt, fingerprintDeps), + artifacts: prepared.artifacts, + role: ledger.SessionRoleReviewer, + agentID: &agentID, + model: runtimeConfig.model, + effort: runtimeConfig.effort, + logPath: logPath, + prompt: prompt, + }, func(data []byte) (llm.Findings, error) { + return llm.DecodeFindings(data, llm.FindingsOptions{ + KnownAgents: map[string]bool{agent.ID: true}, + ChangedFiles: changedFiles(prepared.parsed.Patches), + NewFindingID: findingSequence("reload"), + }) + }) + if !ok { + t.Fatal("loadStructuredTask ok = false, want persisted reviewer metadata to be loaded and rejected") + } + if err == nil || !strings.Contains(err.Error(), "input fingerprint changed") { + t.Fatalf("loadStructuredTask stale workbench error = %v, want input fingerprint changed", err) + } + if len(secondAdapter.Requests()) != 0 || len(secondAdapter.Resumes()) != 0 { + t.Fatalf("adapter invoked despite stale reviewer metadata: starts=%#v resumes=%#v", secondAdapter.Requests(), secondAdapter.Resumes()) + } +} + func TestRollupPromptPreservesLocationForDedupeWithoutRawAnchors(t *testing.T) { prompt, err := buildRollupPrompt(gitprovider.PR{Body: "Rollup prompt body should stay out of prompt payloads."}, []review.Finding{ { @@ -4489,23 +4632,64 @@ func TestDryRunContextBudgetFailures(t *testing.T) { want: "context budget exceeded for selection model bench-model", runID: "run-budget-selection-override", }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + store := openPipelineStore(t) + defer closeStore(t, store) + provider, req := dryRunHarness(t) + adapter := &llm.FakeAdapter{NameValue: "fake-llm"} + if tt.mutate != nil { + tt.mutate(t, provider, &req, adapter) + } + if tt.queue != nil { + tt.queue(adapter) + } + _, err := dryRunForTest(ctx, Options{ + Provider: provider, + Adapter: adapter, + Store: store, + Layout: statepaths.NewLayout(t.TempDir(), t.TempDir()), + Now: fixedNow, + NewRunID: func() string { return tt.runID }, + NewSessionRowID: sequence("session"), + NewFindingID: findingSequence("finding"), + NewActionID: actionSequence(), + Budget: ContextBudget{MaxPromptBytes: tt.budget}, + MaxConcurrency: 1, + }, req) + if err == nil { + t.Fatal("DryRun error = nil, want budget failure") + } + if !strings.Contains(err.Error(), tt.want) { + t.Fatalf("DryRun error = %v, want substring %q", err, tt.want) + } + }) + } +} + +func TestDryRunReviewerCheckoutReadonlyAvoidsContextStuffingBudgetFailures(t *testing.T) { + tests := []struct { + name string + budget int + mutate func(t *testing.T, provider *readOnlyProvider, req *Request) + runID string + }{ { - name: "reviewer diff", + name: "large reviewer diff", budget: 9500, - mutate: func(t *testing.T, provider *readOnlyProvider, _ *Request, _ *llm.FakeAdapter) { + mutate: func(t *testing.T, provider *readOnlyProvider, _ *Request) { t.Helper() provider.diff.Raw = largeDiff("main.go", strings.Repeat("+var x = true\n", 1600)) }, - queue: func(adapter *llm.FakeAdapter) { - adapter.Queue(fakeLLMResult("selection-session", selectionJSON("harness:reviewer", "main.go"), 1, 1)) - }, - want: "context budget exceeded for reviewer agent harness:reviewer", - runID: "run-budget-reviewer", + runID: "run-budget-reviewer-checkout", }, { - name: "full content default model", + name: "legacy full-content agent no longer stuffs base head", budget: 9500, - mutate: func(t *testing.T, provider *readOnlyProvider, req *Request, _ *llm.FakeAdapter) { + mutate: func(t *testing.T, provider *readOnlyProvider, req *Request) { t.Helper() dir := t.TempDir() writeAgentFullContent(t, dir, "harness", "reviewer") @@ -4514,30 +4698,7 @@ func TestDryRunContextBudgetFailures(t *testing.T) { provider.files[fileKey{gitRef: provider.pr.Base.SHA, path: "main.go"}] = []byte(strings.Repeat("base\n", 3000)) provider.files[fileKey{gitRef: provider.pr.Head.SHA, path: "main.go"}] = []byte("package main\n") }, - queue: func(adapter *llm.FakeAdapter) { - adapter.Queue(fakeLLMResult("selection-session", selectionJSON("harness:reviewer", "main.go"), 1, 1)) - }, - want: "context budget exceeded for full-content agent harness:reviewer file main.go model claude-sonnet-4-6", - runID: "run-budget-full-content-default", - }, - { - name: "full content override model", - budget: 10000, - mutate: func(t *testing.T, provider *readOnlyProvider, req *Request, _ *llm.FakeAdapter) { - t.Helper() - dir := t.TempDir() - writeAgentFullContent(t, dir, "harness", "reviewer") - trustCurrentTempFixtures(t) - req.Profile.AgentSources = []string{dir} - req.ReviewerModelOverride = "bench-model" - provider.files[fileKey{gitRef: provider.pr.Base.SHA, path: "main.go"}] = []byte(strings.Repeat("base\n", 3000)) - provider.files[fileKey{gitRef: provider.pr.Head.SHA, path: "main.go"}] = []byte("package main\n") - }, - queue: func(adapter *llm.FakeAdapter) { - adapter.Queue(fakeLLMResult("selection-session", selectionJSON("harness:reviewer", "main.go"), 1, 1)) - }, - want: "context budget exceeded for full-content agent harness:reviewer file main.go model bench-model", - runID: "run-budget-full-content-override", + runID: "run-budget-reviewer-legacy-agent", }, } @@ -4548,13 +4709,12 @@ func TestDryRunContextBudgetFailures(t *testing.T) { defer closeStore(t, store) provider, req := dryRunHarness(t) adapter := &llm.FakeAdapter{NameValue: "fake-llm"} - if tt.mutate != nil { - tt.mutate(t, provider, &req, adapter) - } - if tt.queue != nil { - tt.queue(adapter) - } - _, err := dryRunForTest(ctx, Options{ + tt.mutate(t, provider, &req) + adapter.Queue(fakeLLMResult("selection-session", selectionJSON("harness:reviewer", "main.go"), 1, 1)) + adapter.Queue(fakeLLMResult("reviewer-session", findingsJSON("harness:reviewer", "main.go", "major", 2, "Fix this"), 1, 1)) + adapter.Queue(fakeLLMResult("rollup-session", rollupJSON("comment", []string{"finding-1"}), 1, 1)) + + result, err := dryRunForTest(ctx, Options{ Provider: provider, Adapter: adapter, Store: store, @@ -4567,11 +4727,29 @@ func TestDryRunContextBudgetFailures(t *testing.T) { Budget: ContextBudget{MaxPromptBytes: tt.budget}, MaxConcurrency: 1, }, req) - if err == nil { - t.Fatal("DryRun error = nil, want budget failure") + if err != nil { + t.Fatalf("DryRun: %v", err) } - if !strings.Contains(err.Error(), tt.want) { - t.Fatalf("DryRun error = %v, want substring %q", err, tt.want) + requests := adapter.Requests() + if len(requests) < 2 { + t.Fatalf("adapter requests = %#v, want selection and reviewer calls", requests) + } + reviewerPrompt := requests[1].Prompt + for _, forbidden := range []string{`"diff"`, `"base_content"`, `"head_content"`} { + if strings.Contains(reviewerPrompt, forbidden) { + t.Fatalf("reviewer prompt leaked stuffed code field %q: %s", forbidden, reviewerPrompt) + } + } + if requests[1].CheckoutReadonly == nil { + t.Fatalf("reviewer request = %#v, want checkout-readonly access", requests[1]) + } + if requests[1].CheckoutReadonly.RootDir != result.Artifacts.WorkbenchRepoDir || + requests[1].CheckoutReadonly.ScratchDir != result.Artifacts.WorkbenchScratch || + requests[1].CheckoutReadonly.MaxToolOutputBytes != defaultCheckoutReadonlyToolOutputBytes { + t.Fatalf("reviewer checkout request = %#v, want workbench repo/scratch with default cap", requests[1].CheckoutReadonly) + } + if len(result.Findings) != 1 { + t.Fatalf("findings len = %d, want reviewer success under bounded prompt budget", len(result.Findings)) } }) } @@ -4662,6 +4840,10 @@ func (a *promptAwareAdapter) SupportsResume() bool { return false } +func (a *promptAwareAdapter) SupportsCheckoutReadonly() bool { + return true +} + func (a *promptAwareAdapter) SupportsCacheAccounting() bool { return false } @@ -4741,6 +4923,10 @@ func (a *reviewerIsolationAdapter) SupportsResume() bool { return false } +func (a *reviewerIsolationAdapter) SupportsCheckoutReadonly() bool { + return true +} + func (a *reviewerIsolationAdapter) SupportsCacheAccounting() bool { return false }