diff --git a/cmd/roborev/insights.go b/cmd/roborev/insights.go new file mode 100644 index 000000000..2a772200f --- /dev/null +++ b/cmd/roborev/insights.go @@ -0,0 +1,241 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "time" + + "github.com/roborev-dev/roborev/internal/daemon" + "github.com/roborev-dev/roborev/internal/git" + "github.com/roborev-dev/roborev/internal/storage" + "github.com/spf13/cobra" +) + +func insightsCmd() *cobra.Command { + var ( + repoPath string + branch string + since string + agentName string + model string + reasoning string + wait bool + jsonOutput bool + ) + + cmd := &cobra.Command{ + Use: "insights", + Short: "Analyze review patterns and suggest guideline improvements", + Long: `Analyze failing code reviews to identify recurring patterns and suggest +improvements to review guidelines. + +This is an LLM-powered command that: +1. Queries completed reviews (focusing on failures) from the database +2. Includes the current review_guidelines from .roborev.toml as context +3. Sends the batch to an agent with a structured analysis prompt +4. Returns actionable recommendations for guideline changes + +The agent produces: +- Recurring finding patterns across reviews +- Hotspot areas (files/packages that concentrate failures) +- Noise candidates (findings consistently dismissed without code changes) +- Guideline gaps (patterns flagged by reviews but not in guidelines) +- Suggested guideline additions (concrete text for .roborev.toml) + +Examples: + roborev insights # Analyze last 30 days of reviews + roborev insights --since 7d # Last 7 days only + roborev insights --branch main # Only reviews on main branch + roborev insights --repo /path/to/repo # Specific repo + roborev insights --agent gemini --wait # Use specific agent, wait for result + roborev insights --json # Output job info as JSON`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + return runInsights(cmd, insightsOptions{ + repoPath: repoPath, + branch: branch, + since: since, + agentName: agentName, + model: model, + reasoning: reasoning, + wait: wait, + jsonOutput: jsonOutput, + }) + }, + } + + cmd.Flags().StringVar(&repoPath, "repo", "", "scope to a single repo (default: current repo if tracked)") + cmd.Flags().StringVar(&branch, "branch", "", "scope to a single branch") + cmd.Flags().StringVar(&since, "since", "30d", "time window for reviews (e.g., 7d, 30d, 90d)") + cmd.Flags().StringVar(&agentName, "agent", "", "agent to use for analysis (default: from config)") + cmd.Flags().StringVar(&model, "model", "", "model for agent") + cmd.Flags().StringVar(&reasoning, "reasoning", "", "reasoning level: fast, standard, or thorough") + cmd.Flags().BoolVar(&wait, "wait", true, "wait for completion and display result") + cmd.Flags().BoolVar(&jsonOutput, "json", false, "output job info as JSON") + registerAgentCompletion(cmd) + registerReasoningCompletion(cmd) + + return cmd +} + +type insightsOptions struct { + repoPath string + branch string + since string + agentName string + model string + reasoning string + wait bool + jsonOutput bool +} + +func runInsights(cmd *cobra.Command, opts insightsOptions) error { + repoPath := opts.repoPath + if repoPath == "" { + workDir, err := os.Getwd() + if err != nil { + return fmt.Errorf("get working directory: %w", err) + } + repoPath = workDir + } else { + var err error + repoPath, err = filepath.Abs(repoPath) + if err != nil { + return fmt.Errorf("resolve repo path: %w", err) + } + } + + if _, err := git.GetRepoRoot(repoPath); err != nil { + if opts.repoPath == "" { + return fmt.Errorf("not in a git repository (use --repo to specify one)") + } + return fmt.Errorf("--repo %q is not a git repository", opts.repoPath) + } + + // Parse --since duration + sinceTime, err := parseSinceDuration(opts.since) + if err != nil { + return fmt.Errorf("invalid --since value %q: %w", opts.since, err) + } + + // Ensure daemon is running + if err := ensureDaemon(); err != nil { + return err + } + + if !opts.jsonOutput { + cmd.Printf("Queueing insights analysis since %s...\n", sinceTime.Format("2006-01-02")) + } + + reqBody, _ := json.Marshal(daemon.EnqueueRequest{ + RepoPath: repoPath, + GitRef: "insights", + Branch: opts.branch, + Since: sinceTime.Format(time.RFC3339), + Agent: opts.agentName, + Model: opts.model, + Reasoning: opts.reasoning, + JobType: storage.JobTypeInsights, + }) + + ep := getDaemonEndpoint() + resp, err := ep.HTTPClient(30*time.Second).Post(ep.BaseURL()+"/api/enqueue", "application/json", bytes.NewReader(reqBody)) + if err != nil { + return fmt.Errorf("failed to connect to daemon: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode == http.StatusOK { + var skipped struct { + Skipped bool `json:"skipped"` + Reason string `json:"reason"` + } + if err := json.Unmarshal(body, &skipped); err == nil && skipped.Skipped { + if opts.jsonOutput { + enc := json.NewEncoder(cmd.OutOrStdout()) + return enc.Encode(map[string]any{ + "skipped": true, + "reason": skipped.Reason, + "since": sinceTime.Format(time.RFC3339), + }) + } + cmd.Println(skipped.Reason) + return nil + } + } + + if resp.StatusCode != http.StatusCreated { + return fmt.Errorf("enqueue failed: %s", body) + } + + var job storage.ReviewJob + if err := json.Unmarshal(body, &job); err != nil { + return fmt.Errorf("failed to parse response: %w", err) + } + + // JSON output mode + if opts.jsonOutput { + result := map[string]any{ + "job_id": job.ID, + "agent": job.Agent, + "since": sinceTime.Format(time.RFC3339), + } + enc := json.NewEncoder(cmd.OutOrStdout()) + return enc.Encode(result) + } + + cmd.Printf("Enqueued insights job %d (agent: %s)\n", job.ID, job.Agent) + + // Wait for completion + if opts.wait { + return waitForPromptJob(cmd, ep, job.ID, false, promptPollInterval) + } + + return nil +} + +// parseSinceDuration parses a duration string like "7d", "30d", "90d" into a time.Time. +func parseSinceDuration(s string) (time.Time, error) { + s = strings.TrimSpace(s) + if s == "" { + return time.Now().AddDate(0, 0, -30), nil + } + + // Try standard Go duration first (e.g., "720h") + if d, err := time.ParseDuration(s); err == nil { + if d <= 0 { + return time.Time{}, + fmt.Errorf("duration must be positive, got %s", s) + } + return time.Now().Add(-d), nil + } + + // Parse day-based durations (e.g., "7d", "30d") + if strings.HasSuffix(s, "d") { + var days int + if _, err := fmt.Sscanf(s, "%dd", &days); err == nil && days > 0 { + return time.Now().AddDate(0, 0, -days), nil + } + } + + // Parse week-based durations (e.g., "2w", "4w") + if strings.HasSuffix(s, "w") { + var weeks int + if _, err := fmt.Sscanf(s, "%dw", &weeks); err == nil && weeks > 0 { + return time.Now().AddDate(0, 0, -weeks*7), nil + } + } + + return time.Time{}, fmt.Errorf("expected format like 7d, 4w, or 720h") +} diff --git a/cmd/roborev/insights_test.go b/cmd/roborev/insights_test.go new file mode 100644 index 000000000..c9a410237 --- /dev/null +++ b/cmd/roborev/insights_test.go @@ -0,0 +1,136 @@ +package main + +import ( + "bytes" + "encoding/json" + "net/http" + "testing" + "time" + + "github.com/roborev-dev/roborev/internal/daemon" + "github.com/roborev-dev/roborev/internal/storage" + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseSinceDuration(t *testing.T) { + t.Parallel() + + tests := []struct { + input string + wantErr bool + checkFn func(t *testing.T, got time.Time) + }{ + { + input: "7d", + checkFn: func(t *testing.T, got time.Time) { + t.Helper() + expected := time.Now().AddDate(0, 0, -7) + assertDurationApprox(t, expected, got, "7d") + }, + }, + { + input: "30d", + checkFn: func(t *testing.T, got time.Time) { + t.Helper() + expected := time.Now().AddDate(0, 0, -30) + assertDurationApprox(t, expected, got, "30d") + }, + }, + { + input: "2w", + checkFn: func(t *testing.T, got time.Time) { + t.Helper() + expected := time.Now().AddDate(0, 0, -14) + assertDurationApprox(t, expected, got, "2w") + }, + }, + { + input: "720h", + checkFn: func(t *testing.T, got time.Time) { + t.Helper() + expected := time.Now().Add(-720 * time.Hour) + assertDurationApprox(t, expected, got, "720h") + }, + }, + { + input: "", + checkFn: func(t *testing.T, got time.Time) { + t.Helper() + expected := time.Now().AddDate(0, 0, -30) + assertDurationApprox(t, expected, got, "empty") + }, + }, + {input: "invalid", wantErr: true}, + {input: "0d", wantErr: true}, + {input: "-5d", wantErr: true}, + {input: "-5h", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got, err := parseSinceDuration(tt.input) + if tt.wantErr { + require.Error(t, err) + return + } + + require.NoError(t, err) + if tt.checkFn != nil { + tt.checkFn(t, got) + } + }) + } +} + +func TestRunInsights_EnqueuesInsightsJob(t *testing.T) { + repo := NewGitTestRepo(t) + repo.CommitFile("README.md", "hello\n", "init") + repo.Run("checkout", "-b", "feature") + + var enqueued daemon.EnqueueRequest + mux := http.NewServeMux() + mux.HandleFunc("/api/enqueue", func(w http.ResponseWriter, r *http.Request) { + err := json.NewDecoder(r.Body).Decode(&enqueued) + assert.NoError(t, err) + w.WriteHeader(http.StatusCreated) + writeJSON(w, storage.ReviewJob{ + ID: 99, + Agent: "codex", + Status: storage.JobStatusQueued, + }) + }) + + daemonFromHandler(t, mux) + + cmd := &cobra.Command{} + cmd.SetOut(&bytes.Buffer{}) + cmd.SetErr(&bytes.Buffer{}) + + err := runInsights(cmd, insightsOptions{ + repoPath: repo.Dir, + branch: "main", + since: "7d", + wait: false, + }) + require.NoError(t, err) + + require.Equal(t, storage.JobTypeInsights, enqueued.JobType) + require.Equal(t, "insights", enqueued.GitRef) + assert.Equal(t, "main", enqueued.Branch) + assert.Empty(t, enqueued.CustomPrompt) + + since, err := time.Parse(time.RFC3339, enqueued.Since) + require.NoError(t, err) + assert.WithinDuration(t, time.Now().AddDate(0, 0, -7), since, 2*time.Second) +} + +func assertDurationApprox( + t *testing.T, expected, got time.Time, label string, +) { + t.Helper() + diff := got.Sub(expected) + assert.LessOrEqual(t, diff, time.Second, "%s upper bound", label) + assert.GreaterOrEqual(t, diff, -time.Second, "%s lower bound", label) +} diff --git a/cmd/roborev/main.go b/cmd/roborev/main.go index 42232f62d..127730a2e 100644 --- a/cmd/roborev/main.go +++ b/cmd/roborev/main.go @@ -43,6 +43,7 @@ func main() { rootCmd.AddCommand(refineCmd()) rootCmd.AddCommand(runCmd()) rootCmd.AddCommand(analyzeCmd()) + rootCmd.AddCommand(insightsCmd()) rootCmd.AddCommand(fixCmd()) rootCmd.AddCommand(compactCmd()) rootCmd.AddCommand(promptCmd()) // hidden alias for backward compatibility diff --git a/cmd/roborev/main_test.go b/cmd/roborev/main_test.go index 197528c24..3740b001f 100644 --- a/cmd/roborev/main_test.go +++ b/cmd/roborev/main_test.go @@ -439,7 +439,7 @@ func TestRefinePendingJobWaitDoesNotConsumeIteration(t *testing.T) { repoDir, commitSHA := setupRefineRepo(t) // Track how many times the job has been polled - var pollCount int32 + var pollCount atomic.Int32 handleGetJobs := func(w http.ResponseWriter, r *http.Request, s *mockRefineState) bool { q := r.URL.Query() @@ -454,7 +454,7 @@ func TestRefinePendingJobWaitDoesNotConsumeIteration(t *testing.T) { return true } // On first poll, job is still Running; on subsequent polls, transition to Done - count := atomic.AddInt32(&pollCount, 1) + count := pollCount.Add(1) if count > 1 { job.Status = storage.JobStatusDone } @@ -541,7 +541,7 @@ func TestRefinePendingJobWaitDoesNotConsumeIteration(t *testing.T) { require.NoError(t, err, "expected refine to succeed (pending wait should not consume iteration), got: %v") // Verify the job was actually polled multiple times (proving we waited) - assert.GreaterOrEqual(t, atomic.LoadInt32(&pollCount), int32(2)) + assert.GreaterOrEqual(t, pollCount.Load(), int32(2)) } // ============================================================================ diff --git a/internal/daemon/client_test.go b/internal/daemon/client_test.go index d069fb8c1..8e341a8b5 100644 --- a/internal/daemon/client_test.go +++ b/internal/daemon/client_test.go @@ -114,7 +114,7 @@ func TestHTTPClientMarkReviewClosed(t *testing.T) { func TestHTTPClientWaitForReviewUsesJobID(t *testing.T) { assert := assert.New(t) - var reviewCalls int32 + var reviewCalls atomic.Int32 client := mockAPI(t, func(w http.ResponseWriter, r *http.Request) { switch { @@ -126,7 +126,7 @@ func TestHTTPClientWaitForReviewUsesJobID(t *testing.T) { case r.URL.Path == "/api/review" && r.Method == http.MethodGet: assertQuery(t, r, "job_id", "1") assertQuery(t, r, "sha", "") - if atomic.AddInt32(&reviewCalls, 1) == 1 { + if reviewCalls.Add(1) == 1 { w.WriteHeader(http.StatusNotFound) return } @@ -141,7 +141,7 @@ func TestHTTPClientWaitForReviewUsesJobID(t *testing.T) { review, err := client.WaitForReview(1) require.NoError(t, err, "WaitForReview failed") assert.Equal("Review complete", review.Output) - assert.GreaterOrEqual(atomic.LoadInt32(&reviewCalls), int32(2), "expected review to be retried after 404") + assert.GreaterOrEqual(reviewCalls.Load(), int32(2), "expected review to be retried after 404") } func TestFindJobForCommit(t *testing.T) { diff --git a/internal/daemon/insights.go b/internal/daemon/insights.go new file mode 100644 index 000000000..1e29fb2c9 --- /dev/null +++ b/internal/daemon/insights.go @@ -0,0 +1,95 @@ +package daemon + +import ( + "database/sql" + "errors" + "fmt" + "path/filepath" + "time" + + "github.com/roborev-dev/roborev/internal/config" + "github.com/roborev-dev/roborev/internal/prompt" + "github.com/roborev-dev/roborev/internal/storage" +) + +const maxInsightsReviews = 100 + +func (s *Server) buildInsightsPrompt( + repoRoot, branch string, since time.Time, +) (string, int, error) { + reviews, err := s.fetchInsightsReviews(repoRoot, branch, since) + if err != nil { + return "", 0, err + } + if len(reviews) == 0 { + return "", 0, nil + } + + cfg := s.configWatcher.Config() + maxPromptSize := config.ResolveMaxPromptSize(repoRoot, cfg) + guidelines := prompt.LoadGuidelines(repoRoot) + + promptText := prompt.BuildInsightsPrompt(prompt.InsightsData{ + Reviews: reviews, + Guidelines: guidelines, + RepoName: filepath.Base(repoRoot), + Since: since, + MaxPromptSize: maxPromptSize, + }) + return promptText, len(reviews), nil +} + +func (s *Server) fetchInsightsReviews( + repoRoot, branch string, since time.Time, +) ([]prompt.InsightsReview, error) { + // Normalize to forward slashes to match how GetOrCreateRepo stores paths. + repoFilter := filepath.ToSlash(repoRoot) + + var listOpts []storage.ListJobsOption + if branch != "" { + listOpts = append(listOpts, storage.WithBranch(branch)) + } + + jobs, err := s.db.ListJobs(string(storage.JobStatusDone), repoFilter, 0, 0, listOpts...) + if err != nil { + return nil, fmt.Errorf("list jobs: %w", err) + } + + reviews := make([]prompt.InsightsReview, 0, min(len(jobs), maxInsightsReviews)) + for _, job := range jobs { + if !job.IsReviewJob() { + continue + } + if job.FinishedAt != nil && job.FinishedAt.Before(since) { + continue + } + if job.Verdict == nil || *job.Verdict != "F" { + continue + } + + review, err := s.db.GetReviewByJobID(job.ID) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + continue + } + return nil, fmt.Errorf("get review for job %d: %w", job.ID, err) + } + + responses, err := s.db.GetCommentsForJob(job.ID) + if err != nil { + return nil, fmt.Errorf("get comments for job %d: %w", job.ID, err) + } + + reviews = append(reviews, prompt.InsightsReviewFromJob( + job, + review.Output, + responses, + review.Closed, + )) + if len(reviews) >= maxInsightsReviews { + break + } + } + + return reviews, nil +} diff --git a/internal/daemon/insights_test.go b/internal/daemon/insights_test.go new file mode 100644 index 000000000..fce6a8523 --- /dev/null +++ b/internal/daemon/insights_test.go @@ -0,0 +1,192 @@ +package daemon + +import ( + "net/http" + "net/http/httptest" + "path/filepath" + "testing" + "time" + + gitpkg "github.com/roborev-dev/roborev/internal/git" + "github.com/roborev-dev/roborev/internal/storage" + "github.com/roborev-dev/roborev/internal/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHandleEnqueueInsightsBuildsPromptServerSide(t *testing.T) { + server, db, tmpDir := newTestServer(t) + + repoDir := filepath.Join(tmpDir, "repo") + testutil.InitTestGitRepo(t, repoDir) + + mainRoot, err := gitpkg.GetMainRepoRoot(repoDir) + require.NoError(t, err) + + repo, err := db.GetOrCreateRepo(mainRoot) + require.NoError(t, err) + + included := enqueueCompletedInsightsReviewJob(t, db, repo.ID, "aaa111", "main", storage.JobTypeReview, failingInsightsOutput("Included finding")) + _, err = db.AddCommentToJob(included.ID, "user", "Intentional tradeoff") + require.NoError(t, err) + + enqueueCompletedInsightsReviewJob(t, db, repo.ID, "compact", "main", storage.JobTypeCompact, failingInsightsOutput("Compact finding")) + enqueueCompletedInsightsReviewJob(t, db, repo.ID, "bbb222", "feature", storage.JobTypeReview, failingInsightsOutput("Feature finding")) + oldJob := enqueueCompletedInsightsReviewJob(t, db, repo.ID, "ccc333", "main", storage.JobTypeReview, failingInsightsOutput("Old finding")) + + since := time.Now().Add(-24 * time.Hour) + _, err = db.Exec( + `UPDATE review_jobs SET finished_at = ? WHERE id = ?`, + since.Add(-time.Hour).Format(time.RFC3339), + oldJob.ID, + ) + require.NoError(t, err) + + req := testutil.MakeJSONRequest(t, http.MethodPost, "/api/enqueue", EnqueueRequest{ + RepoPath: repoDir, + GitRef: "insights", + Branch: "main", + Agent: "test", + JobType: storage.JobTypeInsights, + Since: since.Format(time.RFC3339), + }) + w := httptest.NewRecorder() + + server.handleEnqueue(w, req) + + require.Equal(t, http.StatusCreated, w.Code, w.Body.String()) + + var job storage.ReviewJob + testutil.DecodeJSON(t, w, &job) + assert.Equal(t, storage.JobTypeInsights, job.JobType) + assert.Equal(t, "main", job.Branch) + + stored, err := db.GetJobByID(job.ID) + require.NoError(t, err) + require.NotEmpty(t, stored.Prompt) + assert.Equal(t, storage.JobTypeInsights, stored.JobType) + assert.Contains(t, stored.Prompt, "Included finding") + assert.Contains(t, stored.Prompt, `- user: "Intentional tradeoff"`) + assert.NotContains(t, stored.Prompt, "Compact finding") + assert.NotContains(t, stored.Prompt, "Feature finding") + assert.NotContains(t, stored.Prompt, "Old finding") +} + +func TestHandleEnqueueInsightsSkipsWhenNoFailingReviewsMatch(t *testing.T) { + server, _, tmpDir := newTestServer(t) + + repoDir := filepath.Join(tmpDir, "repo") + testutil.InitTestGitRepo(t, repoDir) + + req := testutil.MakeJSONRequest(t, http.MethodPost, "/api/enqueue", EnqueueRequest{ + RepoPath: repoDir, + GitRef: "insights", + Agent: "test", + JobType: storage.JobTypeInsights, + Since: time.Now().Add(-24 * time.Hour).Format(time.RFC3339), + }) + w := httptest.NewRecorder() + + server.handleEnqueue(w, req) + + require.Equal(t, http.StatusOK, w.Code, w.Body.String()) + + var resp struct { + Skipped bool `json:"skipped"` + Reason string `json:"reason"` + } + testutil.DecodeJSON(t, w, &resp) + + assert.True(t, resp.Skipped) + assert.Contains(t, resp.Reason, "No failing reviews found") +} + +func TestHandleEnqueueInsightsNoBranchIncludesAllBranches(t *testing.T) { + server, db, tmpDir := newTestServer(t) + + repoDir := filepath.Join(tmpDir, "repo") + testutil.InitTestGitRepo(t, repoDir) + + mainRoot, err := gitpkg.GetMainRepoRoot(repoDir) + require.NoError(t, err) + + repo, err := db.GetOrCreateRepo(mainRoot) + require.NoError(t, err) + + // Create failing reviews on two different branches. + enqueueCompletedInsightsReviewJob( + t, db, repo.ID, "aaa111", "main", + storage.JobTypeReview, failingInsightsOutput("Main finding"), + ) + enqueueCompletedInsightsReviewJob( + t, db, repo.ID, "bbb222", "feature", + storage.JobTypeReview, failingInsightsOutput("Feature finding"), + ) + + since := time.Now().Add(-24 * time.Hour) + req := testutil.MakeJSONRequest(t, http.MethodPost, "/api/enqueue", + EnqueueRequest{ + RepoPath: repoDir, + GitRef: "insights", + Agent: "test", + JobType: storage.JobTypeInsights, + Since: since.Format(time.RFC3339), + // Branch intentionally omitted. + }) + w := httptest.NewRecorder() + + server.handleEnqueue(w, req) + + require.Equal(t, http.StatusCreated, w.Code, w.Body.String()) + + var job storage.ReviewJob + testutil.DecodeJSON(t, w, &job) + + stored, err := db.GetJobByID(job.ID) + require.NoError(t, err) + + assert.Contains(t, stored.Prompt, "Main finding") + assert.Contains(t, stored.Prompt, "Feature finding") +} + +func enqueueCompletedInsightsReviewJob( + t *testing.T, + db *storage.DB, + repoID int64, + gitRef, branch, jobType, output string, +) *storage.ReviewJob { + t.Helper() + + opts := storage.EnqueueOpts{ + RepoID: repoID, + GitRef: gitRef, + Branch: branch, + Agent: "test", + JobType: jobType, + } + + if jobType == storage.JobTypeCompact { + opts.Prompt = "compact prompt" + opts.Label = "compact" + } else { + commit, err := db.GetOrCreateCommit(repoID, gitRef, "Author", "Subject", time.Now()) + require.NoError(t, err) + opts.CommitID = commit.ID + } + + job, err := db.EnqueueJob(opts) + require.NoError(t, err) + + claimed, err := db.ClaimJob("worker") + require.NoError(t, err) + require.Equal(t, job.ID, claimed.ID) + + err = db.CompleteJob(job.ID, "test", "prompt", output) + require.NoError(t, err) + + return job +} + +func failingInsightsOutput(summary string) string { + return "- Medium - " + summary +} diff --git a/internal/daemon/server.go b/internal/daemon/server.go index a8ef74e7f..8de42a353 100644 --- a/internal/daemon/server.go +++ b/internal/daemon/server.go @@ -597,6 +597,7 @@ type EnqueueRequest struct { CommitSHA string `json:"commit_sha,omitempty"` // Single commit (for backwards compat) GitRef string `json:"git_ref,omitempty"` // Single commit, range like "abc..def", or "dirty" Branch string `json:"branch,omitempty"` // Branch name at time of job creation + Since string `json:"since,omitempty"` // RFC3339 lower bound for insights datasets Agent string `json:"agent,omitempty"` Model string `json:"model,omitempty"` // Model to use (for opencode: provider/model format) DiffContent string `json:"diff_content,omitempty"` // Pre-captured diff for dirty reviews @@ -605,7 +606,7 @@ type EnqueueRequest struct { CustomPrompt string `json:"custom_prompt,omitempty"` // Custom prompt for ad-hoc agent work Agentic bool `json:"agentic,omitempty"` // Enable agentic mode (allow file edits) OutputPrefix string `json:"output_prefix,omitempty"` // Prefix to prepend to review output - JobType string `json:"job_type,omitempty"` // Explicit job type (review/range/dirty/task/compact) + JobType string `json:"job_type,omitempty"` // Explicit job type (review/range/dirty/task/insights/compact/fix) Provider string `json:"provider,omitempty"` // Provider for pi agent (e.g., "anthropic") } @@ -717,17 +718,29 @@ func (s *Server) handleEnqueue(w http.ResponseWriter, r *http.Request) { // Check if branch is excluded from reviews currentBranch := git.GetCurrentBranch(checkoutRoot) - if currentBranch != "" && config.IsBranchExcluded(checkoutRoot, currentBranch) { + branchToCheck := currentBranch + if req.JobType == storage.JobTypeInsights { + // Insights is read-only historical analysis — only apply branch + // exclusion when an explicit branch filter was provided. + if req.Branch != "" { + branchToCheck = req.Branch + } else { + branchToCheck = "" + } + } + if branchToCheck != "" && config.IsBranchExcluded(checkoutRoot, branchToCheck) { // Silently skip excluded branches - return 200 OK with skipped flag writeJSON(w, map[string]any{ "skipped": true, - "reason": fmt.Sprintf("branch %q is excluded from reviews", currentBranch), + "reason": fmt.Sprintf("branch %q is excluded from reviews", branchToCheck), }) return } - // Fall back to detected branch when client didn't send one - if req.Branch == "" { + // Fall back to detected branch when client didn't send one. + // Insights jobs are read-only historical analysis, so leave + // the branch empty to analyze all branches. + if req.Branch == "" && req.JobType != storage.JobTypeInsights { req.Branch = currentBranch } @@ -806,6 +819,32 @@ func (s *Server) handleEnqueue(w http.ResponseWriter, r *http.Request) { ) } + if req.JobType == storage.JobTypeInsights { + if req.Since == "" { + writeError(w, http.StatusBadRequest, "since is required for insights jobs") + return + } + since, err := time.Parse(time.RFC3339, req.Since) + if err != nil { + writeError(w, http.StatusBadRequest, "since must be RFC3339") + return + } + + insightsPrompt, reviewCount, err := s.buildInsightsPrompt(repoRoot, req.Branch, since) + if err != nil { + s.writeInternalError(w, fmt.Sprintf("build insights prompt: %v", err)) + return + } + if reviewCount == 0 { + writeJSON(w, map[string]any{ + "skipped": true, + "reason": "No failing reviews found in the specified time window.", + }) + return + } + req.CustomPrompt = insightsPrompt + } + // Check if this is a custom prompt, dirty review, range, or single commit // Note: isPrompt is determined by whether custom_prompt is provided, not git_ref value // This allows reviewing a branch literally named "prompt" without collision diff --git a/internal/daemon/worker_test.go b/internal/daemon/worker_test.go index 35f2a1a8f..94ffb95de 100644 --- a/internal/daemon/worker_test.go +++ b/internal/daemon/worker_test.go @@ -320,8 +320,8 @@ func TestWorkerPoolCancelJobConcurrentRegister(t *testing.T) { tc := newWorkerTestContext(t, 1) job := tc.createAndClaimJob(t, "concurrent-register", testWorkerID) - var canceled int32 - cancelFunc := func() { atomic.AddInt32(&canceled, 1) } + var canceled atomic.Int32 + cancelFunc := func() { canceled.Add(1) } tc.Pool.testHookAfterSecondCheck = func() { tc.Pool.registerRunningJob(job.ID, cancelFunc) @@ -334,7 +334,7 @@ func TestWorkerPoolCancelJobConcurrentRegister(t *testing.T) { return false }, "CancelJob should return true") } - if atomic.LoadInt32(&canceled) != 1 { + if canceled.Load() != 1 { assert.Condition(t, func() bool { return false }, "Job should have been canceled exactly once") diff --git a/internal/prompt/insights.go b/internal/prompt/insights.go new file mode 100644 index 000000000..d36a08e0c --- /dev/null +++ b/internal/prompt/insights.go @@ -0,0 +1,228 @@ +package prompt + +import ( + "fmt" + "strings" + "time" + + "github.com/roborev-dev/roborev/internal/storage" +) + +// InsightsSystemPrompt is the instruction for analyzing review patterns +const InsightsSystemPrompt = `You are a code review insights analyst. Your task is to analyze a collection of failing code reviews and identify actionable patterns that can improve future reviews. + +You will be given: +1. A set of failing review outputs with metadata (agent, date, git ref, addressed status) +2. The current project review guidelines (if any) + +Your analysis should produce: + +## 1. Recurring Finding Patterns + +Identify clusters of similar findings that appear across multiple reviews. For each pattern: +- Name the pattern concisely +- Count how many reviews contain it +- Give 1-2 representative examples with file/line references if available +- Assess whether this pattern represents a real code quality issue or review noise + +## 2. Hotspot Areas + +Identify files, packages, or directories that concentrate failures. List them with: +- Path or package name +- Number of failing reviews touching this area +- Common finding types in this area + +## 3. Noise Candidates + +Identify finding types that are consistently present in reviews that were closed (marked "addressed/closed") and accompanied by developer comments suggesting the finding was intentional or a false positive. These suggest the review guideline should suppress them. For each: +- Describe the finding type +- Note how many times it appeared and was closed with dismissive comments +- Suggest whether to suppress it entirely or refine the criteria + +## 4. Guideline Gaps + +Identify patterns the reviews keep flagging that are NOT mentioned in the current guidelines. For each: +- Describe what the reviews are catching +- Explain why it should (or shouldn't) be codified as a guideline +- Suggest specific guideline text if appropriate + +## 5. Suggested Guideline Additions + +Provide concrete text snippets that could be added to the project's review_guidelines configuration. Format each as a ready-to-use guideline entry. + +Be specific and actionable. Avoid vague recommendations. Base everything on evidence from the provided reviews.` + +// InsightsData holds the data needed to build an insights prompt +type InsightsData struct { + Reviews []InsightsReview + Guidelines string + RepoName string + Since time.Time + MaxReviews int // Cap for number of reviews to include + MaxOutputPerReview int // Cap for individual review output size + MaxPromptSize int // Overall prompt size budget (0 = use MaxPromptSize default) +} + +// InsightsReview is a simplified review record for the insights prompt +type InsightsReview struct { + JobID int64 + Agent string + GitRef string + Branch string + FinishedAt *time.Time + Output string + Responses []storage.Response + Closed bool + Verdict string // "P" or "F" +} + +// BuildInsightsPrompt constructs the full prompt for insights analysis. +// It prioritizes unaddressed (open) findings over addressed (closed) ones +// when truncating to fit within size limits. +func BuildInsightsPrompt(data InsightsData) string { + var sb strings.Builder + + promptBudget := data.MaxPromptSize + if promptBudget <= 0 { + promptBudget = MaxPromptSize + } + + sb.WriteString(InsightsSystemPrompt) + sb.WriteString("\n\n") + + // Current guidelines section — cap at 10% of prompt budget so + // oversized guidelines don't crowd out review data. + sb.WriteString("## Current Review Guidelines\n\n") + if data.Guidelines != "" { + guidelines := data.Guidelines + guidelineCap := min(promptBudget/10, 1024) + if len(guidelines) > guidelineCap { + guidelines = guidelines[:guidelineCap] + "\n... (guidelines truncated)" + } + sb.WriteString(guidelines) + } else { + sb.WriteString("(No review guidelines configured for this project)") + } + sb.WriteString("\n\n") + + // Separate into open (unaddressed) and closed (addressed) reviews + var open, closed []InsightsReview + for _, r := range data.Reviews { + if r.Closed { + closed = append(closed, r) + } else { + open = append(open, r) + } + } + + maxReviews := data.MaxReviews + if maxReviews <= 0 { + maxReviews = 50 + } + maxOutput := data.MaxOutputPerReview + if maxOutput <= 0 { + maxOutput = 8192 + } + // Prioritize open reviews, fill remaining slots with closed + var selected []InsightsReview + for _, r := range open { + if len(selected) >= maxReviews { + break + } + selected = append(selected, r) + } + for _, r := range closed { + if len(selected) >= maxReviews { + break + } + selected = append(selected, r) + } + + // Reviews section + sb.WriteString("## Failing Reviews\n\n") + fmt.Fprintf(&sb, "Showing %d failing review(s) since %s.\n\n", + len(selected), data.Since.Format("2006-01-02")) + + for i, r := range selected { + // Build the review entry in a temporary buffer so we can + // check size before committing it to the prompt. + var entry strings.Builder + fmt.Fprintf(&entry, "### Review %d (Job #%d)\n\n", i+1, r.JobID) + fmt.Fprintf(&entry, "- **Agent:** %s\n", r.Agent) + fmt.Fprintf(&entry, "- **Git Ref:** %s\n", r.GitRef) + if r.Branch != "" { + fmt.Fprintf(&entry, "- **Branch:** %s\n", r.Branch) + } + if r.FinishedAt != nil { + fmt.Fprintf(&entry, "- **Date:** %s\n", r.FinishedAt.Format("2006-01-02 15:04")) + } + status := "unaddressed" + if r.Closed { + status = "addressed/closed" + } + fmt.Fprintf(&entry, "- **Status:** %s\n", status) + entry.WriteString("\n") + + output := r.Output + if len(output) > maxOutput { + output = output[:maxOutput] + "\n... (truncated)" + } + entry.WriteString(output) + if !strings.HasSuffix(output, "\n") { + entry.WriteString("\n") + } + if len(r.Responses) > 0 { + entry.WriteString("\nComments on this review:\n") + commentCap := 2048 + commentBytes := 0 + for _, resp := range r.Responses { + line := fmt.Sprintf("- %s: %q\n", resp.Responder, resp.Response) + if commentBytes+len(line) > commentCap { + entry.WriteString("... (remaining comments truncated)\n") + break + } + entry.WriteString(line) + commentBytes += len(line) + } + } + entry.WriteString("\n") + + // Pre-check: would adding this entry exceed the budget? + if sb.Len()+entry.Len() > promptBudget-256 { + remaining := len(selected) - i + fmt.Fprintf(&sb, "\n(Remaining %d reviews omitted due to prompt size limits)\n", remaining) + break + } + + sb.WriteString(entry.String()) + } + + if len(data.Reviews) > len(selected) { + fmt.Fprintf(&sb, "\nNote: %d additional failing reviews were omitted (showing most recent %d, prioritizing unaddressed findings).\n", + len(data.Reviews)-len(selected), len(selected)) + } + + return sb.String() +} + +// InsightsReviewFromJob converts a ReviewJob (with verdict) to an InsightsReview. +// The review output must be fetched separately. +func InsightsReviewFromJob( + job storage.ReviewJob, output string, responses []storage.Response, closed bool, +) InsightsReview { + verdict := "" + if job.Verdict != nil { + verdict = *job.Verdict + } + return InsightsReview{ + JobID: job.ID, + Agent: job.Agent, + GitRef: job.GitRef, + Branch: job.Branch, + FinishedAt: job.FinishedAt, + Output: output, + Responses: responses, + Closed: closed, + Verdict: verdict, + } +} diff --git a/internal/prompt/insights_test.go b/internal/prompt/insights_test.go new file mode 100644 index 000000000..e2ed5d73e --- /dev/null +++ b/internal/prompt/insights_test.go @@ -0,0 +1,224 @@ +package prompt + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/roborev-dev/roborev/internal/storage" + "github.com/stretchr/testify/assert" +) + +func TestBuildInsightsPrompt_Basic(t *testing.T) { + t.Parallel() + + now := time.Now() + finished := now.Add(-1 * time.Hour) + data := InsightsData{ + Reviews: []InsightsReview{ + { + JobID: 1, + Agent: "codex", + GitRef: "abc1234", + Branch: "main", + FinishedAt: &finished, + Output: "Found SQL injection in handler.go:42", + Closed: false, + Verdict: "F", + }, + { + JobID: 2, + Agent: "gemini", + GitRef: "def5678", + FinishedAt: &finished, + Output: "Missing error handling in parser.go:10", + Closed: true, + Verdict: "F", + }, + }, + Guidelines: "Always check error returns", + RepoName: "myrepo", + Since: now.Add(-30 * 24 * time.Hour), + } + + result := BuildInsightsPrompt(data) + + assert.Contains(t, result, "code review insights analyst") + assert.Contains(t, result, "Always check error returns") + assert.Contains(t, result, "SQL injection") + assert.Contains(t, result, "Missing error handling") + assert.Contains(t, result, "unaddressed") + assert.Contains(t, result, "addressed/closed") + assert.Contains(t, result, "codex") +} + +func TestBuildInsightsPrompt_IncludesComments(t *testing.T) { + t.Parallel() + + data := InsightsData{ + Reviews: []InsightsReview{ + { + JobID: 1, + Agent: "test", + Output: "finding", + Responses: []storage.Response{ + {Responder: "user", Response: "This was intentional"}, + {Responder: "agent", Response: "Confirmed after discussion"}, + }, + }, + }, + Since: time.Now().Add(-7 * 24 * time.Hour), + } + + result := BuildInsightsPrompt(data) + + assert.Contains(t, result, "Comments on this review:") + assert.Contains(t, result, `- user: "This was intentional"`) + assert.Contains(t, result, `- agent: "Confirmed after discussion"`) +} + +func TestBuildInsightsPrompt_TruncatesLargeCommentThread(t *testing.T) { + t.Parallel() + + var responses []storage.Response + for i := range 50 { + responses = append(responses, storage.Response{ + Responder: "user", + Response: strings.Repeat("comment text ", 20) + fmt.Sprintf("comment-%d", i), + }) + } + + data := InsightsData{ + Reviews: []InsightsReview{ + { + JobID: 1, + Agent: "test", + Output: "a finding", + Responses: responses, + }, + }, + Since: time.Now().Add(-7 * 24 * time.Hour), + MaxPromptSize: 50000, + } + + result := BuildInsightsPrompt(data) + + assert.Contains(t, result, "a finding") + assert.Contains(t, result, "Comments on this review:") + assert.Contains(t, result, "remaining comments truncated") + // The review should still be included despite large comments + assert.Contains(t, result, "Review 1") +} + +func TestBuildInsightsPrompt_NoGuidelines(t *testing.T) { + t.Parallel() + + data := InsightsData{ + Reviews: []InsightsReview{ + {JobID: 1, Agent: "test", Output: "finding"}, + }, + Since: time.Now().Add(-7 * 24 * time.Hour), + } + + result := BuildInsightsPrompt(data) + + assert.Contains(t, result, "No review guidelines configured") +} + +func TestBuildInsightsPrompt_PrioritizesOpen(t *testing.T) { + t.Parallel() + + data := InsightsData{ + MaxReviews: 2, + Reviews: []InsightsReview{ + {JobID: 1, Agent: "test", Output: "open finding 1", Closed: false}, + {JobID: 2, Agent: "test", Output: "open finding 2", Closed: false}, + {JobID: 3, Agent: "test", Output: "open finding 3", Closed: false}, + {JobID: 4, Agent: "test", Output: "closed finding", Closed: true}, + }, + Since: time.Now().Add(-7 * 24 * time.Hour), + } + + result := BuildInsightsPrompt(data) + + assert.Contains(t, result, "open finding 1") + assert.Contains(t, result, "open finding 2") + assert.NotContains(t, result, "closed finding") + assert.Contains(t, result, "additional failing reviews were omitted") +} + +func TestBuildInsightsPrompt_TruncatesLongOutput(t *testing.T) { + t.Parallel() + + longOutput := strings.Repeat("x", 10000) + data := InsightsData{ + Reviews: []InsightsReview{ + {JobID: 1, Agent: "test", Output: longOutput}, + }, + MaxOutputPerReview: 100, + Since: time.Now().Add(-7 * 24 * time.Hour), + } + + result := BuildInsightsPrompt(data) + + assert.Contains(t, result, "... (truncated)") + assert.NotContains(t, result, longOutput) +} + +func TestBuildInsightsPrompt_Empty(t *testing.T) { + t.Parallel() + + data := InsightsData{ + Since: time.Now().Add(-7 * 24 * time.Hour), + } + + result := BuildInsightsPrompt(data) + + assert.Contains(t, result, "0 failing review(s)") +} + +func TestBuildInsightsPrompt_RespectsPromptBudget(t *testing.T) { + t.Parallel() + + var reviews []InsightsReview + for i := range 20 { + reviews = append(reviews, InsightsReview{ + JobID: int64(i + 1), + Agent: "test", + Output: strings.Repeat("finding text ", 100), + }) + } + + budget := 5000 + data := InsightsData{ + Reviews: reviews, + Since: time.Now().Add(-7 * 24 * time.Hour), + MaxPromptSize: budget, + } + + result := BuildInsightsPrompt(data) + + assert.LessOrEqual(t, len(result), budget) + assert.Contains(t, result, "omitted due to prompt size limits") +} + +func TestBuildInsightsPrompt_TruncatesLargeGuidelines(t *testing.T) { + t.Parallel() + + hugeGuidelines := strings.Repeat("guideline rule ", 10000) + data := InsightsData{ + Reviews: []InsightsReview{ + {JobID: 1, Agent: "test", Output: "a finding"}, + }, + Guidelines: hugeGuidelines, + Since: time.Now().Add(-7 * 24 * time.Hour), + MaxPromptSize: 10000, + } + + result := BuildInsightsPrompt(data) + + assert.LessOrEqual(t, len(result), data.MaxPromptSize) + assert.Contains(t, result, "guidelines truncated") + assert.Contains(t, result, "a finding") +} diff --git a/internal/prompt/prompt.go b/internal/prompt/prompt.go index 853f3394f..69671b000 100644 --- a/internal/prompt/prompt.go +++ b/internal/prompt/prompt.go @@ -461,7 +461,7 @@ func (b *Builder) buildSinglePrompt(repoPath, sha string, repoID int64, contextC var optionalContext strings.Builder // Add project-specific guidelines from default branch - b.writeProjectGuidelines(&optionalContext, loadGuidelines(repoPath)) + b.writeProjectGuidelines(&optionalContext, LoadGuidelines(repoPath)) // Get previous reviews if requested if contextCount > 0 && b.db != nil { @@ -578,7 +578,7 @@ func (b *Builder) buildRangePrompt(repoPath, rangeRef string, repoID int64, cont var optionalContext strings.Builder // Add project-specific guidelines from default branch - b.writeProjectGuidelines(&optionalContext, loadGuidelines(repoPath)) + b.writeProjectGuidelines(&optionalContext, LoadGuidelines(repoPath)) // Get previous reviews from before the range start if contextCount > 0 && b.db != nil { @@ -722,12 +722,10 @@ func (b *Builder) writeProjectGuidelines(sb *strings.Builder, guidelines string) sb.WriteString("\n\n") } -// loadMergedGuidelines loads review guidelines from the repo's default -// branch (detected via git) and the given ref, then merges them so -// branch guidelines can add lines but cannot remove base lines. -// Falls back to filesystem LoadRepoConfig only when no .roborev.toml -// exists on the default branch (not when it exists with empty guidelines). -func loadGuidelines(repoPath string) string { +// LoadGuidelines loads review guidelines from the repo's default +// branch, falling back to filesystem config when the default branch +// has no .roborev.toml. +func LoadGuidelines(repoPath string) string { // Load review guidelines from the default branch (origin/main, // origin/master, etc.). Branch-specific guidelines are intentionally // ignored to prevent prompt injection from untrusted PR authors. diff --git a/internal/prompt/prompt_test.go b/internal/prompt/prompt_test.go index b3317d0b8..9ee47b7a3 100644 --- a/internal/prompt/prompt_test.go +++ b/internal/prompt/prompt_test.go @@ -461,7 +461,7 @@ func singleCommitPromptPrefixLen(t *testing.T, repoPath, sha string) int { b := NewBuilder(nil) sb.WriteString(GetSystemPrompt("codex", "review")) sb.WriteString("\n") - b.writeProjectGuidelines(&sb, loadGuidelines(repoPath)) + b.writeProjectGuidelines(&sb, LoadGuidelines(repoPath)) info, err := gitpkg.GetCommitInfo(repoPath, sha) require.NoError(t, err, "GetCommitInfo failed: %v", err) @@ -484,7 +484,7 @@ func rangePromptPrefixLen(t *testing.T, repoPath, rangeRef string) int { b := NewBuilder(nil) sb.WriteString(GetSystemPrompt("codex", "range")) sb.WriteString("\n") - b.writeProjectGuidelines(&sb, loadGuidelines(repoPath)) + b.writeProjectGuidelines(&sb, LoadGuidelines(repoPath)) commits, err := gitpkg.GetRangeCommits(repoPath, rangeRef) require.NoError(t, err, "GetRangeCommits failed: %v", err) @@ -1011,7 +1011,7 @@ func TestLoadGuidelines(t *testing.T) { tt.setupFilesystem(t, ctx.Dir) } - guidelines := loadGuidelines(ctx.Dir) + guidelines := LoadGuidelines(ctx.Dir) if tt.wantContains != "" { assertContains(t, guidelines, tt.wantContains, "missing expected guidelines") } diff --git a/internal/storage/models.go b/internal/storage/models.go index 07fc9fadf..91caa51fa 100644 --- a/internal/storage/models.go +++ b/internal/storage/models.go @@ -37,12 +37,13 @@ const ( // JobType classifies what kind of work a review job represents. const ( - JobTypeReview = "review" // Single commit review - JobTypeRange = "range" // Commit range review - JobTypeDirty = "dirty" // Uncommitted changes review - JobTypeTask = "task" // Run/analyze/design/custom prompt - JobTypeCompact = "compact" // Consolidated review verification - JobTypeFix = "fix" // Background fix using worktree + JobTypeReview = "review" // Single commit review + JobTypeRange = "range" // Commit range review + JobTypeDirty = "dirty" // Uncommitted changes review + JobTypeTask = "task" // Run/analyze/design/custom prompt + JobTypeInsights = "insights" // Historical review insights analysis + JobTypeCompact = "compact" // Consolidated review verification + JobTypeFix = "fix" // Background fix using worktree ) type ReviewJob struct { @@ -56,7 +57,7 @@ type ReviewJob struct { Model string `json:"model,omitempty"` // Model to use (for opencode: provider/model format) Provider string `json:"provider,omitempty"` // Provider to use (e.g., anthropic, openai) Reasoning string `json:"reasoning,omitempty"` // thorough, standard, fast (default: thorough) - JobType string `json:"job_type"` // review, range, dirty, task + JobType string `json:"job_type"` // review, range, dirty, task, insights, compact, fix Status JobStatus `json:"status"` EnqueuedAt time.Time `json:"enqueued_at"` StartedAt *time.Time `json:"started_at,omitempty"` @@ -102,7 +103,7 @@ func (j ReviewJob) IsDirtyJob() bool { // Compact jobs are not considered task jobs since they produce P/F verdicts. func (j ReviewJob) IsTaskJob() bool { if j.JobType != "" { - return j.JobType == JobTypeTask + return j.JobType == JobTypeTask || j.JobType == JobTypeInsights } // Fallback heuristic for jobs without job_type (e.g., from old sync data) if j.CommitID != nil { @@ -124,10 +125,38 @@ func (j ReviewJob) IsTaskJob() bool { } // UsesStoredPrompt returns true if this job type uses a pre-stored prompt -// (task, compact, or fix). These job types have prompts built at enqueue +// (task, insights, compact, or fix). These job types have prompts built at enqueue // time, not constructed by the worker from git data. func (j ReviewJob) UsesStoredPrompt() bool { - return j.JobType == JobTypeTask || j.JobType == JobTypeCompact || j.JobType == JobTypeFix + return j.JobType == JobTypeTask || + j.JobType == JobTypeInsights || + j.JobType == JobTypeCompact || + j.JobType == JobTypeFix +} + +// IsReviewJob returns true if this is an actual code review +// (single commit, range, or dirty) rather than a task, insights, +// compact, or fix job. The legacy fallback uses positive review +// signals (CommitID, dirty ref, range ref) to avoid misclassifying +// old stored-prompt jobs that happen to have a GitRef. +func (j ReviewJob) IsReviewJob() bool { + if j.JobType != "" { + return j.JobType == JobTypeReview || + j.JobType == JobTypeRange || + j.JobType == JobTypeDirty + } + // Positive heuristic for legacy jobs without job_type: + // a review has a commit, is dirty, or is a range. + if j.CommitID != nil { + return true + } + if j.GitRef == "dirty" || j.DiffContent != nil { + return true + } + if strings.Contains(j.GitRef, "..") { + return true + } + return false } // IsFixJob returns true if this is a background fix job. diff --git a/internal/storage/models_test.go b/internal/storage/models_test.go index c77c71b16..ebe1f1d4e 100644 --- a/internal/storage/models_test.go +++ b/internal/storage/models_test.go @@ -47,6 +47,11 @@ func TestIsTaskJob(t *testing.T) { job: storage.ReviewJob{JobType: storage.JobTypeTask, GitRef: "analyze"}, want: true, }, + { + name: "explicit: insights job by job_type", + job: storage.ReviewJob{JobType: storage.JobTypeInsights, GitRef: "insights"}, + want: true, + }, // Inferred JobType { name: "inferred: single commit review", @@ -152,6 +157,35 @@ func TestIsDirtyJob(t *testing.T) { } } +func TestIsReviewJob(t *testing.T) { + assert := assert.New(t) + + tests := []struct { + name string + job storage.ReviewJob + want bool + }{ + // Explicit JobType + {name: "explicit: review", job: storage.ReviewJob{JobType: storage.JobTypeReview, GitRef: "abc123"}, want: true}, + {name: "explicit: range", job: storage.ReviewJob{JobType: storage.JobTypeRange, GitRef: "a..b"}, want: true}, + {name: "explicit: dirty", job: storage.ReviewJob{JobType: storage.JobTypeDirty, GitRef: "dirty"}, want: true}, + {name: "explicit: task", job: storage.ReviewJob{JobType: storage.JobTypeTask, GitRef: "run"}, want: false}, + {name: "explicit: insights", job: storage.ReviewJob{JobType: storage.JobTypeInsights, GitRef: "insights"}, want: false}, + {name: "explicit: compact", job: storage.ReviewJob{JobType: storage.JobTypeCompact, GitRef: "compact"}, want: false}, + {name: "explicit: fix", job: storage.ReviewJob{JobType: storage.JobTypeFix, GitRef: "abc123"}, want: false}, + // Inferred (empty job_type from old data) + {name: "inferred: commit review", job: storage.ReviewJob{CommitID: testutil.Ptr(int64(1)), GitRef: "abc123"}, want: true}, + {name: "inferred: dirty", job: storage.ReviewJob{GitRef: "dirty"}, want: true}, + {name: "inferred: range", job: storage.ReviewJob{GitRef: "abc..def"}, want: true}, + {name: "inferred: task label", job: storage.ReviewJob{GitRef: "run:lint"}, want: false}, + {name: "inferred: empty ref", job: storage.ReviewJob{GitRef: ""}, want: false}, + } + + for _, tt := range tests { + assert.Equal(tt.want, tt.job.IsReviewJob(), "%q: IsReviewJob() = %v, want %v", tt.name, tt.job.IsReviewJob(), tt.want) + } +} + func TestUsesStoredPrompt(t *testing.T) { assert := assert.New(t) @@ -161,6 +195,7 @@ func TestUsesStoredPrompt(t *testing.T) { want bool }{ {name: "task", job: storage.ReviewJob{JobType: storage.JobTypeTask}, want: true}, + {name: "insights", job: storage.ReviewJob{JobType: storage.JobTypeInsights}, want: true}, {name: "compact", job: storage.ReviewJob{JobType: storage.JobTypeCompact}, want: true}, {name: "review", job: storage.ReviewJob{JobType: storage.JobTypeReview}, want: false}, {name: "range", job: storage.ReviewJob{JobType: storage.JobTypeRange}, want: false},