diff --git a/internal/gitprovider/github/rest.go b/internal/gitprovider/github/rest.go index 10e6486..760da81 100644 --- a/internal/gitprovider/github/rest.go +++ b/internal/gitprovider/github/rest.go @@ -31,6 +31,7 @@ type branchResponse struct { type prResponse struct { Title string `json:"title"` + Body string `json:"body"` HTMLURL string `json:"html_url"` State string `json:"state"` Merged bool `json:"merged"` @@ -103,6 +104,7 @@ func (c *Client) GetPR(ctx context.Context, ref gitprovider.PRRef) (gitprovider. return gitprovider.PR{ Ref: ref, Title: payload.Title, + Body: payload.Body, URL: payload.HTMLURL, State: state, Author: identityFromUser(payload.User), diff --git a/internal/gitprovider/types.go b/internal/gitprovider/types.go index 686a400..5f2b6db 100644 --- a/internal/gitprovider/types.go +++ b/internal/gitprovider/types.go @@ -87,6 +87,7 @@ func (s PRState) Valid() bool { type PR struct { Ref PRRef Title string + Body string URL string State PRState Author Identity diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index 1ac5458..dade84c 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -8,6 +8,7 @@ import ( "encoding/json" "errors" "fmt" + "io/fs" "io" "os" "path/filepath" @@ -52,6 +53,8 @@ type ReadProvider interface { GetFileAtRef(context.Context, gitprovider.PRRef, string, string) ([]byte, error) ListTreeAtRef(context.Context, gitprovider.PRRef, string, string) ([]gitprovider.TreeEntry, error) ListInlineThreads(context.Context, gitprovider.PRRef) ([]gitprovider.InlineThread, error) + ListReviews(context.Context, gitprovider.PRRef) ([]gitprovider.Review, error) + ListIssueComments(context.Context, gitprovider.PRRef) ([]gitprovider.IssueComment, error) Capabilities() gitprovider.ProviderCaps } @@ -193,6 +196,7 @@ type ArtifactPaths struct { AgentSourcesJSON string `json:"agent_sources_json"` AgentLogsDir string `json:"agent_logs_dir"` LLMTasksDir string `json:"llm_tasks_dir"` + DossierDir string `json:"dossier_dir"` } // SlicePatch returns the artifact path for an agent/file diff slice. @@ -253,6 +257,40 @@ func (p ArtifactPaths) LLMTaskRawAttempt(taskID, attempt string) (string, error) return filepath.Join(dir, statepaths.Encode(attempt)+".json"), nil } +// DossierRawPath returns a raw dossier artifact path by file name. +func (p ArtifactPaths) DossierRawPath(name string) (string, error) { + return dossierChildPath(filepath.Join(p.DossierDir, "raw"), name) +} + +// DossierSummaryPath returns a summary dossier artifact path by file name. +func (p ArtifactPaths) DossierSummaryPath(name string) (string, error) { + return dossierChildPath(filepath.Join(p.DossierDir, "summary"), name) +} + +// DossierFinalPath returns a reviewer-facing dossier artifact path by file name. +func (p ArtifactPaths) DossierFinalPath(name string) (string, error) { + return dossierChildPath(filepath.Join(p.DossierDir, "final"), name) +} + +// DossierIndexPath returns the dossier index artifact path. +func (p ArtifactPaths) DossierIndexPath() string { + return filepath.Join(p.DossierDir, "index.json") +} + +func dossierChildPath(dir, name string) (string, error) { + name = strings.TrimSpace(name) + if name == "" { + return "", fmt.Errorf("pipeline: dossier artifact name is required") + } + if name == "." || name == ".." { + return "", fmt.Errorf("pipeline: dossier artifact name must be a file name") + } + if strings.Contains(name, "/") || strings.Contains(name, string(filepath.Separator)) { + return "", fmt.Errorf("pipeline: dossier artifact name must be a file name") + } + return filepath.Join(dir, name), nil +} + const llmTaskSchemaVersion = 1 type llmTaskStatus string @@ -436,6 +474,8 @@ type preparedSelectionContext struct { parsed ParsedDiff changedFiles []string threads []gitprovider.InlineThread + reviews []gitprovider.Review + issueComments []gitprovider.IssueComment catalog agents.Catalog effectiveCaps reviewplan.ProviderCaps agentDefsChanged bool @@ -874,6 +914,9 @@ func prepareSelectionContext(ctx context.Context, opts Options, req selectionSet if err := os.MkdirAll(artifacts.LLMTasksDir, 0o700); err != nil { return preparedSelectionContext{}, fmt.Errorf("pipeline: create LLM task dir: %w", err) } + if err := os.MkdirAll(artifacts.DossierDir, 0o700); err != nil { + return preparedSelectionContext{}, fmt.Errorf("pipeline: create dossier dir: %w", err) + } quota, quotaSupported, err := opts.Adapter.Quota(ctx) if err != nil { @@ -888,11 +931,21 @@ func prepareSelectionContext(ctx context.Context, opts Options, req selectionSet return preparedSelectionContext{}, err } var threads []gitprovider.InlineThread + var reviews []gitprovider.Review + var issueComments []gitprovider.IssueComment if !reviewCtx.pinnedReview { threads, err = opts.Provider.ListInlineThreads(ctx, req.PRRef) if err != nil { return preparedSelectionContext{}, err } + reviews, err = opts.Provider.ListReviews(ctx, req.PRRef) + if err != nil { + return preparedSelectionContext{}, err + } + issueComments, err = opts.Provider.ListIssueComments(ctx, req.PRRef) + if err != nil { + return preparedSelectionContext{}, err + } } catalog, err := agents.Load(ctx, agents.LoadOptions{ ProfileDirs: append([]string(nil), req.Profile.AgentSources...), @@ -904,7 +957,7 @@ func prepareSelectionContext(ctx context.Context, opts Options, req selectionSet return preparedSelectionContext{}, err } - return preparedSelectionContext{ + out := preparedSelectionContext{ pr: pr, reviewPR: reviewPR, prKey: prKey, @@ -916,6 +969,8 @@ func prepareSelectionContext(ctx context.Context, opts Options, req selectionSet parsed: parsed, changedFiles: patchPaths(parsed.Patches), threads: threads, + reviews: reviews, + issueComments: issueComments, catalog: catalog, effectiveCaps: effectiveCaps(opts.Provider.Capabilities(), req.NoResolveThreads), agentDefsChanged: agentDefinitionsChanged(parsed.Patches), @@ -923,7 +978,23 @@ func prepareSelectionContext(ctx context.Context, opts Options, req selectionSet currentHeadSHA: reviewCtx.currentHeadSHA, reviewBaseSHA: reviewPR.Base.SHA, reviewHeadSHA: reviewPR.Head.SHA, - }, nil + } + if err := writeDossierArtifacts(artifacts, dossierInputs{ + CurrentPR: pr, + ReviewPR: reviewPR, + PinnedReview: reviewCtx.pinnedReview, + ChangedFiles: parsed.Patches, + Threads: threads, + Reviews: reviews, + IssueComments: issueComments, + Catalog: catalog, + CurrentBaseSHA: reviewCtx.currentBaseSHA, + CurrentHeadSHA: reviewCtx.currentHeadSHA, + DiscussionOmittedNote: "Current PR discussion omitted because this review is pinned to explicit base/head SHAs.", + }); err != nil { + return preparedSelectionContext{}, err + } + return out, nil } func resolveReviewPRContext(ctx context.Context, provider ReadProvider, ref gitprovider.PRRef, reviewBaseSHA, reviewHeadSHA string) (reviewPRContext, error) { @@ -2155,6 +2226,28 @@ type selectionAgentPrompt struct { NeedsFullFileContent bool `json:"needs_full_file_content"` } +type promptPR struct { + Ref gitprovider.PRRef `json:"ref"` + Title string `json:"title"` + URL string `json:"url"` + State gitprovider.PRState `json:"state"` + Author gitprovider.Identity `json:"author"` + Head gitprovider.PRBranchRef `json:"head"` + Base gitprovider.PRBranchRef `json:"base"` +} + +func promptPRFromPR(pr gitprovider.PR) promptPR { + return promptPR{ + Ref: pr.Ref, + Title: pr.Title, + URL: pr.URL, + State: pr.State, + Author: pr.Author, + Head: pr.Head, + Base: pr.Base, + } +} + func selectionAgentPromptFromAgent(agent agents.Agent) selectionAgentPrompt { return selectionAgentPrompt{ ID: agent.ID, @@ -2197,7 +2290,7 @@ func buildSelectionPrompt(pr gitprovider.PR, catalog agents.Catalog, patches []F "output_contract": selectionOutputContract(catalog.Agents, patches, threads, maxAgents), "schema": "selection", "max_selected_agents": maxAgents, - "pr": pr, + "pr": promptPRFromPR(pr), "agents": selectionAgentPromptsFromCatalog(catalog), "changed_files": patchPaths(patches), "threads": threads, @@ -2217,7 +2310,7 @@ func buildRollupPrompt(pr gitprovider.PR, findings []review.Finding, reviewerFai "task": "dedupe findings and return rollup JSON only", "output_contract": rollupOutputContract(findings), "schema": "rollup", - "pr": pr, + "pr": promptPRFromPR(pr), "findings": rollupFindingsPrompt(findings), "reviewer_failures": reviewerFailures, } @@ -2491,6 +2584,601 @@ func writeArtifacts(paths ArtifactPaths, rawDiff string, patches []FilePatch, ca return nil } +type dossierInputs struct { + CurrentPR gitprovider.PR + ReviewPR gitprovider.PR + PinnedReview bool + ChangedFiles []FilePatch + Threads []gitprovider.InlineThread + Reviews []gitprovider.Review + IssueComments []gitprovider.IssueComment + Catalog agents.Catalog + CurrentBaseSHA string + CurrentHeadSHA string + DiscussionOmittedNote string +} + +type dossierChangedFileArtifact struct { + Path string `json:"path"` + OldPath string `json:"old_path,omitempty"` + Status string `json:"status"` + Binary bool `json:"binary,omitempty"` + Deleted bool `json:"deleted,omitempty"` + Additions int `json:"additions,omitempty"` + Deletions int `json:"deletions,omitempty"` + HunkCount int `json:"hunk_count,omitempty"` +} + +type dossierTopLevelCommentArtifact struct { + Kind string `json:"kind"` + URL string `json:"url,omitempty"` + Author string `json:"author,omitempty"` + Body string `json:"body"` + Event string `json:"event,omitempty"` + CreatedAt time.Time `json:"created_at,omitempty"` + UpdatedAt time.Time `json:"updated_at,omitempty"` +} + +type dossierThreadCommentArtifact struct { + URL string `json:"url,omitempty"` + Author string `json:"author,omitempty"` + Body string `json:"body"` + CommitSHA string `json:"commit_sha,omitempty"` + CreatedAt time.Time `json:"created_at,omitempty"` + UpdatedAt time.Time `json:"updated_at,omitempty"` +} + +type dossierInlineThreadArtifact struct { + ID string `json:"id"` + Path string `json:"path,omitempty"` + Side string `json:"side,omitempty"` + Line int `json:"line,omitempty"` + AnchorKind string `json:"anchor_kind,omitempty"` + Resolved bool `json:"resolved"` + CommitSHA string `json:"commit_sha,omitempty"` + Comments []dossierThreadCommentArtifact `json:"comments,omitempty"` +} + +type dossierRepoContextArtifact struct { + RepoInfo *agents.RepoInfo `json:"repo_info,omitempty"` + Sources []agents.SourceInfo `json:"sources,omitempty"` + ExplicitReviewGuidance bool `json:"explicit_review_guidance"` + ExplicitReviewGuidanceSource string `json:"explicit_review_guidance_source,omitempty"` +} + +type dossierPRContextArtifact struct { + Title string `json:"title"` + URL string `json:"url"` + Author string `json:"author,omitempty"` + BaseRef string `json:"base_ref,omitempty"` + BaseSHA string `json:"base_sha,omitempty"` + HeadRef string `json:"head_ref,omitempty"` + HeadSHA string `json:"head_sha,omitempty"` + ReviewBaseSHA string `json:"review_base_sha,omitempty"` + ReviewHeadSHA string `json:"review_head_sha,omitempty"` + PinnedReview bool `json:"pinned_review"` + Body string `json:"body,omitempty"` +} + +type dossierDiscussionArtifact struct { + PinnedReview bool `json:"pinned_review"` + DiscussionOmittedNote string `json:"discussion_omitted_note,omitempty"` + TopLevelComments []dossierTopLevelCommentArtifact `json:"top_level_comments,omitempty"` + InlineThreads []dossierInlineThreadArtifact `json:"inline_threads,omitempty"` +} + +type dossierIndexArtifact struct { + HashAlgorithm string `json:"hash_algorithm"` + Files []dossierIndexFileArtifact `json:"files"` +} + +type dossierIndexFileArtifact struct { + Path string `json:"path"` + SHA256 string `json:"sha256"` +} + +const ( + dossierFinalMaxTopLevelComments = 20 + dossierFinalMaxInlineThreads = 20 + dossierFinalMaxThreadComments = 5 + dossierFinalExcerptRunes = 240 +) + +func writeDossierArtifacts(paths ArtifactPaths, in dossierInputs) error { + rawDir := filepath.Join(paths.DossierDir, "raw") + summaryDir := filepath.Join(paths.DossierDir, "summary") + finalDir := filepath.Join(paths.DossierDir, "final") + for _, dir := range []string{paths.DossierDir, rawDir, summaryDir, finalDir} { + if err := os.MkdirAll(dir, 0o700); err != nil { + return fmt.Errorf("pipeline: create dossier dir: %w", err) + } + } + + prContext := dossierPRContextArtifact{ + Title: in.CurrentPR.Title, + URL: in.CurrentPR.URL, + Author: in.CurrentPR.Author.Login, + BaseRef: in.CurrentPR.Base.Ref, + BaseSHA: in.CurrentBaseSHA, + HeadRef: in.CurrentPR.Head.Ref, + HeadSHA: in.CurrentHeadSHA, + ReviewBaseSHA: in.ReviewPR.Base.SHA, + ReviewHeadSHA: in.ReviewPR.Head.SHA, + PinnedReview: in.PinnedReview, + Body: strings.TrimSpace(in.CurrentPR.Body), + } + + changedFiles := dossierChangedFiles(in.ChangedFiles) + topLevelComments := dossierTopLevelComments(in.IssueComments, in.Reviews) + inlineThreads := dossierInlineThreads(in.Threads) + repoContext := dossierRepoContextArtifact{ + RepoInfo: in.Catalog.Repo, + Sources: append([]agents.SourceInfo(nil), in.Catalog.Sources...), + ExplicitReviewGuidance: false, + } + discussion := dossierDiscussionArtifact{ + PinnedReview: in.PinnedReview, + DiscussionOmittedNote: strings.TrimSpace(in.DiscussionOmittedNote), + } + if !in.PinnedReview { + discussion.TopLevelComments = topLevelComments + discussion.InlineThreads = inlineThreads + } + + rawFiles := map[string]any{ + "pr-context.json": prContext, + "changed-files.json": changedFiles, + "top-level-comments.json": topLevelComments, + "inline-threads.json": inlineThreads, + "repo-context.json": repoContext, + "discussion.json": discussion, + } + for name, payload := range rawFiles { + path, err := paths.DossierRawPath(name) + if err != nil { + return err + } + if err := writeJSONFile(path, payload); err != nil { + return err + } + } + + finalArtifacts := map[string]string{ + "pr-intent.md": renderDossierPRIntent(prContext), + "change-map.md": renderDossierChangeMap(changedFiles), + "repo-guidance.md": renderDossierRepoGuidance(repoContext), + "discussion.md": renderDossierDiscussion(discussion), + } + for name, body := range finalArtifacts { + path, err := paths.DossierFinalPath(name) + if err != nil { + return err + } + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + return fmt.Errorf("pipeline: write dossier artifact %s: %w", name, err) + } + } + + index, err := buildDossierIndex(paths.DossierDir) + if err != nil { + return err + } + return writeJSONFile(paths.DossierIndexPath(), index) +} + +func dossierChangedFiles(patches []FilePatch) []dossierChangedFileArtifact { + out := make([]dossierChangedFileArtifact, 0, len(patches)) + for _, patch := range patches { + additions, deletions := diffStats(patch.Patch) + out = append(out, dossierChangedFileArtifact{ + Path: patch.Path, + OldPath: oldPathIfDifferent(patch.OldPath, patch.Path), + Status: filePatchStatus(patch), + Binary: patch.Binary, + Deleted: patch.Deleted, + Additions: additions, + Deletions: deletions, + HunkCount: len(patch.Hunks), + }) + } + return out +} + +func dossierTopLevelComments(issueComments []gitprovider.IssueComment, reviews []gitprovider.Review) []dossierTopLevelCommentArtifact { + out := make([]dossierTopLevelCommentArtifact, 0, len(issueComments)+len(reviews)) + for _, comment := range issueComments { + body := strings.TrimSpace(comment.Body) + if body == "" { + continue + } + out = append(out, dossierTopLevelCommentArtifact{ + Kind: "issue_comment", + URL: comment.URL, + Author: comment.Author.Login, + Body: body, + CreatedAt: comment.CreatedAt, + UpdatedAt: comment.UpdatedAt, + }) + } + for _, reviewRecord := range reviews { + body := strings.TrimSpace(reviewRecord.Body) + if body == "" { + continue + } + out = append(out, dossierTopLevelCommentArtifact{ + Kind: "review", + URL: reviewRecord.URL, + Author: reviewRecord.Author.Login, + Body: body, + Event: string(reviewRecord.Event), + CreatedAt: reviewRecord.SubmittedAt, + UpdatedAt: reviewRecord.SubmittedAt, + }) + } + sort.SliceStable(out, func(i, j int) bool { + if out[i].CreatedAt.Equal(out[j].CreatedAt) { + return out[i].URL < out[j].URL + } + return out[i].CreatedAt.Before(out[j].CreatedAt) + }) + return out +} + +func dossierInlineThreads(threads []gitprovider.InlineThread) []dossierInlineThreadArtifact { + out := make([]dossierInlineThreadArtifact, 0, len(threads)) + for _, thread := range threads { + comments := make([]dossierThreadCommentArtifact, 0, len(thread.Comments)) + for _, comment := range thread.Comments { + comments = append(comments, dossierThreadCommentArtifact{ + URL: comment.URL, + Author: comment.Author.Login, + Body: strings.TrimSpace(comment.Body), + CommitSHA: comment.CommitSHA, + CreatedAt: comment.CreatedAt, + UpdatedAt: comment.UpdatedAt, + }) + } + sort.SliceStable(comments, func(i, j int) bool { + if comments[i].CreatedAt.Equal(comments[j].CreatedAt) { + if comments[i].URL == comments[j].URL { + return comments[i].Body < comments[j].Body + } + return comments[i].URL < comments[j].URL + } + return comments[i].CreatedAt.Before(comments[j].CreatedAt) + }) + out = append(out, dossierInlineThreadArtifact{ + ID: string(thread.ID), + Path: thread.Path, + Side: string(thread.Side), + Line: thread.Line, + AnchorKind: string(thread.SubjectType), + Resolved: thread.Resolved, + CommitSHA: thread.CommitSHA, + Comments: comments, + }) + } + sort.SliceStable(out, func(i, j int) bool { + if out[i].Path == out[j].Path { + if out[i].Line == out[j].Line { + return out[i].ID < out[j].ID + } + return out[i].Line < out[j].Line + } + return out[i].Path < out[j].Path + }) + return out +} + +func renderDossierPRIntent(pr dossierPRContextArtifact) string { + var out strings.Builder + out.WriteString("# PR Intent\n\n") + if title := strings.TrimSpace(pr.Title); title != "" { + out.WriteString("Title: ") + out.WriteString(title) + out.WriteString("\n\n") + } + if body := strings.TrimSpace(pr.Body); body != "" { + out.WriteString(body) + out.WriteString("\n\n") + } else { + out.WriteString("No PR body provided.\n\n") + } + if pr.URL != "" { + out.WriteString("URL: ") + out.WriteString(pr.URL) + out.WriteString("\n") + } + if pr.Author != "" { + out.WriteString("Author: ") + out.WriteString(pr.Author) + out.WriteString("\n") + } + out.WriteString("Review SHAs: ") + out.WriteString(shortSHAOrValue(pr.ReviewBaseSHA)) + out.WriteString(" -> ") + out.WriteString(shortSHAOrValue(pr.ReviewHeadSHA)) + out.WriteString("\n") + if pr.PinnedReview { + out.WriteString("Pinned review: true\n") + } + return out.String() +} + +func renderDossierChangeMap(files []dossierChangedFileArtifact) string { + var out strings.Builder + out.WriteString("# Change Map\n\n") + if len(files) == 0 { + out.WriteString("No changed files.\n") + return out.String() + } + for _, file := range files { + out.WriteString("- ") + out.WriteString(file.Status) + out.WriteString(": ") + out.WriteString(file.Path) + if file.OldPath != "" { + out.WriteString(" (from ") + out.WriteString(file.OldPath) + out.WriteString(")") + } + fmt.Fprintf(&out, " [+%d -%d]", file.Additions, file.Deletions) + if file.Binary { + out.WriteString(" binary") + } + out.WriteString("\n") + } + return out.String() +} + +func renderDossierRepoGuidance(repo dossierRepoContextArtifact) string { + var out strings.Builder + out.WriteString("# Repo Guidance\n\n") + out.WriteString("No dedicated repo review-guidance source is defined yet.\n\n") + if repo.RepoInfo != nil { + out.WriteString("Repo-local agent provenance: ") + out.WriteString(repo.RepoInfo.Provenance) + out.WriteString("\n\n") + if note := strings.TrimSpace(repo.RepoInfo.TrustNote()); note != "" { + out.WriteString(note) + out.WriteString("\n") + } + } + return out.String() +} + +func renderDossierDiscussion(discussion dossierDiscussionArtifact) string { + var out strings.Builder + out.WriteString("# Discussion\n\n") + if discussion.PinnedReview { + note := strings.TrimSpace(discussion.DiscussionOmittedNote) + if note == "" { + note = "Current PR discussion omitted for pinned review." + } + out.WriteString(note) + out.WriteString("\n") + return out.String() + } + out.WriteString("## Top-level comments\n\n") + comments := reviewerFacingTopLevelComments(discussion.TopLevelComments) + if len(comments) == 0 { + out.WriteString("None.\n\n") + } else { + for _, comment := range capTopLevelComments(comments) { + out.WriteString("- ") + if comment.Kind != "" { + out.WriteString(comment.Kind) + out.WriteString(" ") + } + if comment.Author != "" { + out.WriteString("by ") + out.WriteString(comment.Author) + } + out.WriteString(": ") + out.WriteString(singleLineExcerpt(comment.Body, dossierFinalExcerptRunes)) + out.WriteString("\n") + } + if len(comments) > dossierFinalMaxTopLevelComments { + fmt.Fprintf(&out, "\nAdditional top-level comments omitted: %d\n", len(comments)-dossierFinalMaxTopLevelComments) + } + out.WriteString("\n") + } + out.WriteString("## Inline threads\n\n") + if len(discussion.InlineThreads) == 0 { + out.WriteString("None.\n") + return out.String() + } + for _, thread := range capInlineThreads(discussion.InlineThreads) { + out.WriteString("- ") + out.WriteString(thread.Path) + if thread.Line > 0 { + fmt.Fprintf(&out, ":%d", thread.Line) + } + if thread.Side != "" { + out.WriteString(" [") + out.WriteString(thread.Side) + out.WriteString("]") + } + if thread.AnchorKind != "" { + out.WriteString(" {") + out.WriteString(thread.AnchorKind) + out.WriteString("}") + } + if thread.Resolved { + out.WriteString(" resolved") + } + out.WriteString("\n") + for _, comment := range capThreadComments(thread.Comments) { + out.WriteString(" ") + if comment.Author != "" { + out.WriteString(comment.Author) + out.WriteString(": ") + } + out.WriteString(singleLineExcerpt(comment.Body, dossierFinalExcerptRunes)) + out.WriteString("\n") + } + if len(thread.Comments) > dossierFinalMaxThreadComments { + fmt.Fprintf(&out, " Additional thread comments omitted: %d\n", len(thread.Comments)-dossierFinalMaxThreadComments) + } + } + if len(discussion.InlineThreads) > dossierFinalMaxInlineThreads { + fmt.Fprintf(&out, "\nAdditional inline threads omitted: %d\n", len(discussion.InlineThreads)-dossierFinalMaxInlineThreads) + } + return out.String() +} + +func buildDossierIndex(dir string) (dossierIndexArtifact, error) { + var files []dossierIndexFileArtifact + root, err := os.OpenRoot(dir) + if err != nil { + return dossierIndexArtifact{}, fmt.Errorf("pipeline: open dossier root: %w", err) + } + defer root.Close() + err = fs.WalkDir(root.FS(), ".", func(path string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if entry.IsDir() { + return nil + } + if filepath.Base(path) == "index.json" { + return nil + } + data, err := root.ReadFile(path) + if err != nil { + return err + } + files = append(files, dossierIndexFileArtifact{ + Path: filepath.ToSlash(path), + SHA256: sha256Hex(data), + }) + return nil + }) + if err != nil { + return dossierIndexArtifact{}, fmt.Errorf("pipeline: build dossier index: %w", err) + } + sort.Slice(files, func(i, j int) bool { return files[i].Path < files[j].Path }) + return dossierIndexArtifact{HashAlgorithm: "sha256", Files: files}, nil +} + +func writeJSONFile(path string, payload any) error { + data, err := json.MarshalIndent(payload, "", " ") + if err != nil { + return err + } + if err := os.WriteFile(path, append(data, '\n'), 0o600); err != nil { + return fmt.Errorf("pipeline: write dossier artifact %s: %w", filepath.Base(path), err) + } + return nil +} + +func diffStats(patch string) (int, int) { + additions := 0 + deletions := 0 + for _, line := range splitLines(patch) { + switch { + case strings.HasPrefix(line, "+++ "), strings.HasPrefix(line, "--- "): + continue + case strings.HasPrefix(line, "+"): + additions++ + case strings.HasPrefix(line, "-"): + deletions++ + } + } + return additions, deletions +} + +func filePatchStatus(patch FilePatch) string { + switch { + case patch.Deleted: + return "deleted" + case strings.Contains(patch.Patch, "new file mode") || strings.Contains(patch.Patch, "--- /dev/null"): + return "added" + case patch.OldPath != "" && patch.Path != "" && patch.OldPath != patch.Path: + return "renamed" + default: + return "modified" + } +} + +func oldPathIfDifferent(oldPath, path string) string { + oldPath = strings.TrimSpace(oldPath) + path = strings.TrimSpace(path) + if oldPath == "" || oldPath == path { + return "" + } + return oldPath +} + +func singleLine(value string) string { + value = strings.TrimSpace(value) + value = strings.Join(strings.Fields(value), " ") + if value == "" { + return "(empty)" + } + return value +} + +func singleLineExcerpt(value string, maxRunes int) string { + value = singleLine(value) + if maxRunes <= 0 { + return value + } + runes := []rune(value) + if len(runes) <= maxRunes { + return value + } + return string(runes[:maxRunes]) + "..." +} + +func reviewerFacingTopLevelComments(all []dossierTopLevelCommentArtifact) []dossierTopLevelCommentArtifact { + out := make([]dossierTopLevelCommentArtifact, 0, len(all)) + for _, comment := range all { + if comment.Kind == "review" && comment.Event == string(review.ReviewEventApprove) { + continue + } + out = append(out, comment) + } + return out +} + +func capTopLevelComments(all []dossierTopLevelCommentArtifact) []dossierTopLevelCommentArtifact { + if len(all) <= dossierFinalMaxTopLevelComments { + return all + } + return all[:dossierFinalMaxTopLevelComments] +} + +func capInlineThreads(all []dossierInlineThreadArtifact) []dossierInlineThreadArtifact { + if len(all) <= dossierFinalMaxInlineThreads { + return all + } + return all[:dossierFinalMaxInlineThreads] +} + +func capThreadComments(all []dossierThreadCommentArtifact) []dossierThreadCommentArtifact { + if len(all) <= dossierFinalMaxThreadComments { + return all + } + return all[:dossierFinalMaxThreadComments] +} + +func sha256Hex(data []byte) string { + sum := sha256.Sum256(data) + return hex.EncodeToString(sum[:]) +} + +func shortSHAOrValue(value string) string { + value = strings.TrimSpace(value) + if len(value) > 12 { + return value[:12] + } + if value == "" { + return "unknown" + } + return value +} + func agentSourcesArtifactFromCatalog(catalog agents.Catalog, reviewerRuntime map[string]reviewerRuntimeResolution) agentSourcesArtifact { artifact := agentSourcesArtifact{ Sources: append([]agents.SourceInfo(nil), catalog.Sources...), @@ -2762,6 +3450,7 @@ func ArtifactPathsForRun(layout statepaths.Layout, ref gitprovider.PRRef, pr git AgentSourcesJSON: filepath.Join(dir, "agent-sources.json"), AgentLogsDir: filepath.Join(dir, "agent-logs"), LLMTasksDir: filepath.Join(dir, "llm-tasks"), + DossierDir: filepath.Join(dir, "dossier"), }, nil } @@ -2776,6 +3465,7 @@ func ArtifactPathsFromDir(dir string) ArtifactPaths { AgentSourcesJSON: filepath.Join(dir, "agent-sources.json"), AgentLogsDir: filepath.Join(dir, "agent-logs"), LLMTasksDir: filepath.Join(dir, "llm-tasks"), + DossierDir: filepath.Join(dir, "dossier"), } } diff --git a/internal/pipeline/pipeline_test.go b/internal/pipeline/pipeline_test.go index ed57cca..0a06d95 100644 --- a/internal/pipeline/pipeline_test.go +++ b/internal/pipeline/pipeline_test.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "io/fs" "os" "path/filepath" "reflect" @@ -31,6 +32,36 @@ func TestDryRunPlansAndPersistsWithoutProviderWrites(t *testing.T) { store := openPipelineStore(t) defer closeStore(t, store) provider, req := dryRunHarness(t) + provider.pr.Body = "Document the checkout-native review contract." + provider.threads = []gitprovider.InlineThread{{ + ID: "thread-1", + Resolved: false, + Path: "main.go", + Side: review.DiffSideRight, + Line: 2, + SubjectType: review.AnchorKindLine, + Comments: []gitprovider.ThreadComment{{ + ID: "comment-1", + Body: "Inline concern", + Author: gitprovider.Identity{Login: "reviewer"}, + }}, + }} + provider.issueComments = []gitprovider.IssueComment{{ + ID: "issue-1", + Body: "Top-level concern", + Author: gitprovider.Identity{Login: "maintainer"}, + }} + provider.reviews = []gitprovider.Review{{ + ID: "review-1", + Body: "Review body", + Author: gitprovider.Identity{Login: "architect"}, + Event: review.ReviewEventComment, + }, { + ID: "review-2", + Body: "Approved body should stay out of reviewer-facing discussion", + Author: gitprovider.Identity{Login: "approver"}, + Event: review.ReviewEventApprove, + }} adapter := &llm.FakeAdapter{ NameValue: "fake-llm", QuotaValue: llm.Quota{BlockRemainingPct: 87, WeeklyRemainingPct: 64}, @@ -133,6 +164,14 @@ func TestDryRunPlansAndPersistsWithoutProviderWrites(t *testing.T) { assertFileContains(t, result.Artifacts.FindingsJSON, `"severity": "major"`) assertFileContains(t, result.Artifacts.RollupMarkdown, "Automated PR Review") assertAgentSourcesArtifact(t, result.Artifacts.AgentSourcesJSON, "harness:reviewer") + assertFileContains(t, filepath.Join(result.Artifacts.DossierDir, "final", "pr-intent.md"), "Document the checkout-native review contract.") + assertFileContains(t, filepath.Join(result.Artifacts.DossierDir, "final", "discussion.md"), "main.go:2") + assertFileContains(t, filepath.Join(result.Artifacts.DossierDir, "final", "discussion.md"), "Top-level concern") + assertFileContains(t, filepath.Join(result.Artifacts.DossierDir, "final", "discussion.md"), "Review body") + assertFileContains(t, filepath.Join(result.Artifacts.DossierDir, "final", "repo-guidance.md"), "No dedicated repo review-guidance source is defined yet.") + assertDossierIndexArtifact(t, result.Artifacts.DossierDir, "final/discussion.md") + assertFileOmits(t, filepath.Join(result.Artifacts.DossierDir, "final", "discussion.md"), "provider_session_id", "session_row_id", "mergeability", "approval", "CI status", "Approved body should stay out of reviewer-facing discussion") + assertFileContains(t, filepath.Join(result.Artifacts.DossierDir, "raw", "top-level-comments.json"), "Approved body should stay out of reviewer-facing discussion") slicePath, err := result.Artifacts.SlicePatch("harness:reviewer", "main.go") if err != nil { t.Fatalf("SlicePatch: %v", err) @@ -260,6 +299,9 @@ func TestDryRunWithPinnedReviewSHAsUsesCompareDiffAndPinnedFileRefs(t *testing.T if provider.threadCalls != 0 { t.Fatalf("thread calls = %d, want no live thread reads for pinned review", provider.threadCalls) } + if provider.reviewCalls != 0 || provider.issueCommentCalls != 0 { + t.Fatalf("review/comment calls = %d/%d, want no live discussion reads for pinned review", provider.reviewCalls, provider.issueCommentCalls) + } if !containsFileCall(provider.treeCalls, fileKey{gitRef: currentBaseSHA, path: ".codereview/agents"}) { t.Fatalf("tree calls = %#v, want repo agents loaded from current base SHA", provider.treeCalls) } @@ -267,6 +309,7 @@ func TestDryRunWithPinnedReviewSHAsUsesCompareDiffAndPinnedFileRefs(t *testing.T t.Fatalf("tree calls = %#v, want no repo agent load from pinned review base SHA", provider.treeCalls) } assertFileContains(t, result.Artifacts.DiffPatch, "diff --git a/main.go b/main.go") + assertFileContains(t, filepath.Join(result.Artifacts.DossierDir, "final", "discussion.md"), "Current PR discussion omitted because this review is pinned to explicit base/head SHAs.") } func TestDryRunWithPinnedReviewSHAsRejectsForkHeads(t *testing.T) { @@ -414,6 +457,7 @@ func TestSelectionOnlyRunsSingleSelectionPhaseWithoutReviewArtifacts(t *testing. if result.SelectionSession.ProviderSessionID != "selection-session" || result.SelectionSession.Model != "claude-sonnet-4-6" || result.SelectionSession.Effort != "medium" { t.Fatalf("selection session = %#v, want selection-session claude-sonnet-4-6/medium", result.SelectionSession) } + assertDossierIndexArtifact(t, result.Artifacts.DossierDir, "final/change-map.md") expectedLog, err := expectedArtifacts.AgentLog("orchestrator-selection") if err != nil { t.Fatalf("AgentLog: %v", err) @@ -431,6 +475,8 @@ func TestSelectionOnlyRunsSingleSelectionPhaseWithoutReviewArtifacts(t *testing. if _, err := os.Stat(result.Artifacts.RollupMarkdown); !errors.Is(err, os.ErrNotExist) { t.Fatalf("rollup artifact stat error = %v, want not exist", err) } + assertFileContains(t, filepath.Join(result.Artifacts.DossierDir, "final", "discussion.md"), "main.go:2") + assertDossierIndexArtifact(t, result.Artifacts.DossierDir, "raw/pr-context.json") } func TestSelectionOnlyPromptPreservesRoutingContractWithoutReviewerPromptBodies(t *testing.T) { @@ -451,6 +497,7 @@ func TestSelectionOnlyPromptPreservesRoutingContractWithoutReviewerPromptBodies( req.Profile.AgentSources = []string{dir} req.SelectionPromptInstructions = "Prefer applies_when over prompt wording when routing." provider.diff.Raw = smallDiff("main.go") + smallDiff("other.go") + provider.pr.Body = "Selection prompt body should stay out of prompt payloads." adapter := &llm.FakeAdapter{NameValue: "fake-llm"} adapter.Queue(fakeLLMResult("selection-session", selectionJSON("harness:alpha", "main.go"), 10, 2)) @@ -472,6 +519,7 @@ func TestSelectionOnlyPromptPreservesRoutingContractWithoutReviewerPromptBodies( Schema string `json:"schema"` SelectionInstructions string `json:"selection_instructions"` OutputContract map[string]any `json:"output_contract"` + PR promptPR `json:"pr"` Agents []selectionAgentPrompt `json:"agents"` ChangedFiles []string `json:"changed_files"` Threads []gitprovider.InlineThread `json:"threads"` @@ -485,6 +533,10 @@ func TestSelectionOnlyPromptPreservesRoutingContractWithoutReviewerPromptBodies( if payload.Task != defaultSelectionTask || payload.Schema != "selection" || payload.OutputContract == nil { t.Fatalf("selection prompt envelope = %#v, want task/schema/output contract", payload) } + if payload.PR.Title != provider.pr.Title || payload.PR.URL != provider.pr.URL || payload.PR.Author.Login != provider.pr.Author.Login || + payload.PR.Base.SHA != provider.pr.Base.SHA || payload.PR.Head.SHA != provider.pr.Head.SHA { + t.Fatalf("selection prompt pr = %#v, want title/url/author/base/head from provider PR", payload.PR) + } if !reflect.DeepEqual(payload.ChangedFiles, []string{"main.go", "other.go"}) { t.Fatalf("changed files = %#v, want main.go/other.go", payload.ChangedFiles) } @@ -516,6 +568,9 @@ func TestSelectionOnlyPromptPreservesRoutingContractWithoutReviewerPromptBodies( t.Fatalf("selection prompt leaked reviewer execution detail %q: %s", forbidden, selectionPrompt) } } + if strings.Contains(selectionPrompt, "Selection prompt body should stay out of prompt payloads.") { + t.Fatalf("selection prompt leaked PR body: %s", selectionPrompt) + } } func TestSelectionOnlyRejectsInvalidSelection(t *testing.T) { @@ -2844,7 +2899,7 @@ func TestFindingsOutputContractScopesAnchorToFindingItems(t *testing.T) { } func TestRollupPromptPreservesLocationForDedupeWithoutRawAnchors(t *testing.T) { - prompt, err := buildRollupPrompt(gitprovider.PR{}, []review.Finding{ + prompt, err := buildRollupPrompt(gitprovider.PR{Body: "Rollup prompt body should stay out of prompt payloads."}, []review.Finding{ { ID: "finding-1", Severity: review.SeverityMajor, @@ -2879,6 +2934,9 @@ func TestRollupPromptPreservesLocationForDedupeWithoutRawAnchors(t *testing.T) { if strings.Contains(prompt, `"anchor"`) { t.Fatalf("rollup prompt leaked raw anchor key: %s", prompt) } + if strings.Contains(prompt, "Rollup prompt body should stay out of prompt payloads.") { + t.Fatalf("rollup prompt leaked PR body: %s", prompt) + } } func TestDryRunContextBudgetFailures(t *testing.T) { @@ -3043,19 +3101,23 @@ func TestSessionRowIDForFindingRequiresReviewerSession(t *testing.T) { } type readOnlyProvider struct { - pr gitprovider.PR - diff gitprovider.UnifiedDiff - diffCalls int - diffBetween gitprovider.UnifiedDiff - diffBetweenCalls []shaPair - files map[fileKey][]byte - fileCalls []fileKey - trees map[fileKey][]gitprovider.TreeEntry - treeCalls []fileKey - threads []gitprovider.InlineThread - threadCalls int - caps gitprovider.ProviderCaps - onGetPR func() + pr gitprovider.PR + diff gitprovider.UnifiedDiff + diffCalls int + diffBetween gitprovider.UnifiedDiff + diffBetweenCalls []shaPair + files map[fileKey][]byte + fileCalls []fileKey + trees map[fileKey][]gitprovider.TreeEntry + treeCalls []fileKey + threads []gitprovider.InlineThread + reviews []gitprovider.Review + issueComments []gitprovider.IssueComment + threadCalls int + reviewCalls int + issueCommentCalls int + caps gitprovider.ProviderCaps + onGetPR func() } type shaPair struct { @@ -3335,6 +3397,16 @@ func (p *readOnlyProvider) ListInlineThreads(context.Context, gitprovider.PRRef) return append([]gitprovider.InlineThread(nil), p.threads...), nil } +func (p *readOnlyProvider) ListReviews(context.Context, gitprovider.PRRef) ([]gitprovider.Review, error) { + p.reviewCalls++ + return append([]gitprovider.Review(nil), p.reviews...), nil +} + +func (p *readOnlyProvider) ListIssueComments(context.Context, gitprovider.PRRef) ([]gitprovider.IssueComment, error) { + p.issueCommentCalls++ + return append([]gitprovider.IssueComment(nil), p.issueComments...), nil +} + func (p *readOnlyProvider) Capabilities() gitprovider.ProviderCaps { return p.caps } @@ -3356,6 +3428,7 @@ func dryRunHarness(t *testing.T) (*readOnlyProvider, Request) { pr := gitprovider.PR{ Ref: ref, Title: "CR-20 dry-run", + Body: "Default PR body.", URL: prURL(ref), State: gitprovider.PRStateOpen, Author: gitprovider.Identity{Login: "author", ID: "author-id"}, @@ -3819,6 +3892,84 @@ func assertReviewerRuntimeArtifact(t *testing.T, path, wantAgent string, want re t.Fatalf("artifact agents = %#v, want reviewer runtime for %s", artifact.Agents, wantAgent) } +func assertDossierIndexArtifact(t *testing.T, dir, wantPath string) { + t.Helper() + data, err := os.ReadFile(filepath.Join(dir, "index.json")) // #nosec G304 -- test reads artifact path under t.TempDir. + if err != nil { + t.Fatalf("ReadFile(dossier index): %v", err) + } + var index dossierIndexArtifact + if err := json.Unmarshal(data, &index); err != nil { + t.Fatalf("Unmarshal dossier index: %v\n%s", err, data) + } + if index.HashAlgorithm != "sha256" { + t.Fatalf("hash algorithm = %q, want sha256", index.HashAlgorithm) + } + if len(index.Files) == 0 { + t.Fatal("dossier index files = 0, want artifacts") + } + wantHashes := map[string]string{} + root, err := os.OpenRoot(dir) + if err != nil { + t.Fatalf("OpenRoot(%s): %v", dir, err) + } + defer root.Close() + err = fs.WalkDir(root.FS(), ".", func(path string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if entry.IsDir() || filepath.Base(path) == "index.json" { + return nil + } + fileData, err := root.ReadFile(path) + if err != nil { + return err + } + wantHashes[filepath.ToSlash(path)] = sha256Hex(fileData) + return nil + }) + if err != nil { + t.Fatalf("WalkDir(dossier): %v", err) + } + var saw bool + for _, file := range index.Files { + if file.Path == wantPath { + saw = true + } + if file.Path == "" || file.SHA256 == "" { + t.Fatalf("index file = %#v, want non-empty path/hash", file) + } + wantHash, ok := wantHashes[file.Path] + if !ok { + t.Fatalf("index file = %#v, want tracked dossier artifact", file) + } + if file.SHA256 != wantHash { + t.Fatalf("index hash for %s = %q, want %q", file.Path, file.SHA256, wantHash) + } + delete(wantHashes, file.Path) + } + if !saw { + t.Fatalf("dossier index files = %#v, want %q", index.Files, wantPath) + } + if len(wantHashes) != 0 { + t.Fatalf("dossier index missing files = %#v", wantHashes) + } +} + +func assertFileOmits(t *testing.T, path string, forbidden ...string) { + t.Helper() + data, err := os.ReadFile(path) // #nosec G304 -- test reads artifact path returned by pipeline under t.TempDir. + if err != nil { + t.Fatalf("ReadFile(%s): %v", path, err) + } + text := string(data) + for _, needle := range forbidden { + if strings.Contains(text, needle) { + t.Fatalf("artifact %s contains forbidden substring %q:\n%s", path, needle, text) + } + } +} + func findArtifactSource(sources []agents.SourceInfo, kind agents.SourceKind) (agents.SourceInfo, bool) { for _, source := range sources { if source.Kind == kind { @@ -3917,6 +4068,14 @@ func (p *failingProvider) ListInlineThreads(context.Context, gitprovider.PRRef) return nil, p.err } +func (p *failingProvider) ListReviews(context.Context, gitprovider.PRRef) ([]gitprovider.Review, error) { + return nil, p.err +} + +func (p *failingProvider) ListIssueComments(context.Context, gitprovider.PRRef) ([]gitprovider.IssueComment, error) { + return nil, p.err +} + func (p *failingProvider) Capabilities() gitprovider.ProviderCaps { return gitprovider.ProviderCaps{} }