From 4e1f7073c6ea16a281c684fb34e88520a6715fa1 Mon Sep 17 00:00:00 2001 From: Britney Wang Date: Tue, 9 Jun 2026 16:17:25 +0800 Subject: [PATCH 01/15] feat: update workflow to create issue on default --- internal/workflow/workflow.go | 413 ++++++++++++++++++---------------- 1 file changed, 214 insertions(+), 199 deletions(-) diff --git a/internal/workflow/workflow.go b/internal/workflow/workflow.go index cd82e4b..6441f82 100644 --- a/internal/workflow/workflow.go +++ b/internal/workflow/workflow.go @@ -12,6 +12,7 @@ import ( "bauer/internal/config" "bauer/internal/github" "bauer/internal/orchestrator" + "bauer/internal/prompt" ) // WorkflowInput represents the input for a complete workflow execution @@ -28,8 +29,7 @@ type WorkflowInput struct { PageRefresh bool OutputDir string Model string - DryRun bool - ParseOnly bool // Phase 1: Parse document only, skip GitHub integration + ParseOnly bool // Parse document to JSON only. // Local repository path LocalRepoPath string @@ -60,7 +60,11 @@ type WorkflowOutput struct { FinalizationInfo struct { CommitMessage string BranchPushed bool - PullRequest struct { + Issue struct { + URL string + Title string + } + PullRequest struct { URL string Number int Title string @@ -79,12 +83,9 @@ type WorkflowOutput struct { Warnings []string `json:"warnings"` } -// ExecuteWorkflow orchestrates the complete flow: -// 1. GitHub Setup (clone, create branch) -// 2. Bauer Processing (extract, chunk, apply changes) -// 3. GitHub Finalization (commit, push, create PR) -// -// If ParseOnly is true, skips steps 1 and 3, outputs parsed data to JSON +// ExecuteWorkflow orchestrates a parse-first flow: +// 1. Parse Google Doc and write bauer-parse-result.json +// 2. If ParseOnly is false, push the JSON to a branch and create a Copilot issue func ExecuteWorkflow(ctx context.Context, input WorkflowInput, orch orchestrator.Orchestrator) (*WorkflowOutput, error) { output := &WorkflowOutput{ Status: "pending", @@ -111,266 +112,280 @@ func ExecuteWorkflow(ctx context.Context, input WorkflowInput, orch orchestrator logger.Info("workflow: resolved credentials path", "path", credentialsPath) } - // PHASE 1: Parse-only mode - extract and output JSON without GitHub integration - if input.ParseOnly { - logger.Info("workflow: Phase 1 - Parse-only mode") - - // Create Bauer config for parsing - bauerCfg := &config.Config{ - DocID: input.DocID, - CredentialsPath: credentialsPath, - DryRun: true, // Always dry-run in parse-only mode - ChunkSize: input.ChunkSize, - PageRefresh: input.PageRefresh, - OutputDir: input.OutputDir, - Model: input.Model, - TargetRepo: ".", // Current directory (doesn't matter for parse-only) - ParseOnly: true, - } + logger.Info("workflow: parse phase") - bauerCfg.ApplyDefaults() - input.OutputDir = bauerCfg.OutputDir - input.Model = bauerCfg.Model + bauerCfg := &config.Config{ + DocID: input.DocID, + CredentialsPath: credentialsPath, + DryRun: true, + ChunkSize: input.ChunkSize, + PageRefresh: input.PageRefresh, + OutputDir: input.OutputDir, + Model: input.Model, + TargetRepo: ".", + ParseOnly: true, + } - logger.Info("workflow: Executing parse-only orchestration") + bauerCfg.ApplyDefaults() + input.OutputDir = bauerCfg.OutputDir + input.Model = bauerCfg.Model - // Execute parse-only orchestration - bauerResult, err := orch.Execute(ctx, bauerCfg) - if err != nil { - output.Status = "failed" - output.Errors = append(output.Errors, fmt.Sprintf("Bauer parsing error: %v", err)) - output.EndTime = time.Now() - output.TotalDuration = output.EndTime.Sub(output.StartTime) - return output, err - } + bauerResult, err := orch.Execute(ctx, bauerCfg) + if err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("Bauer parsing error: %v", err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err + } - // Store results from orchestrator - if bauerResult != nil { - output.BauerResult.ExtractionDuration = bauerResult.ExtractionDuration - output.BauerResult.PlanDuration = 0 // No prompt generation in parse-only mode - output.BauerResult.CopilotDuration = 0 // No Copilot execution in parse-only mode - if bauerResult.ParseResult != nil { - output.BauerResult.TotalSuggestions = len(bauerResult.ParseResult.ActionableSuggestions) - } + if bauerResult != nil { + output.BauerResult.ExtractionDuration = bauerResult.ExtractionDuration + output.BauerResult.PlanDuration = 0 + output.BauerResult.CopilotDuration = 0 + if bauerResult.ParseResult != nil { + output.BauerResult.TotalSuggestions = len(bauerResult.ParseResult.ActionableSuggestions) } + } - // Write the simplified ParseResult to JSON file - absOutputDir, err := filepath.Abs(input.OutputDir) + absOutputDir, err := filepath.Abs(input.OutputDir) + if err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("failed to resolve output directory path: %v", err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err + } + + if err := os.MkdirAll(absOutputDir, 0755); err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("failed to create output directory: %v", err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err + } + + outputPath := filepath.Join(absOutputDir, "bauer-parse-result.json") + if bauerResult != nil && bauerResult.ParseResult != nil { + parseResultJSON, err := json.MarshalIndent(bauerResult.ParseResult, "", " ") if err != nil { output.Status = "failed" - output.Errors = append(output.Errors, fmt.Sprintf("failed to resolve output directory path: %v", err)) + output.Errors = append(output.Errors, fmt.Sprintf("failed to marshal parse result: %v", err)) output.EndTime = time.Now() output.TotalDuration = output.EndTime.Sub(output.StartTime) return output, err } - if err := os.MkdirAll(absOutputDir, 0755); err != nil { + if err := os.WriteFile(outputPath, parseResultJSON, 0644); err != nil { output.Status = "failed" - output.Errors = append(output.Errors, fmt.Sprintf("failed to create output directory: %v", err)) + output.Errors = append(output.Errors, fmt.Sprintf("failed to write parse result file: %v", err)) output.EndTime = time.Now() output.TotalDuration = output.EndTime.Sub(output.StartTime) return output, err } + } - outputPath := filepath.Join(absOutputDir, "bauer-parse-result.json") - if bauerResult != nil && bauerResult.ParseResult != nil { - parseResultJSON, err := json.MarshalIndent(bauerResult.ParseResult, "", " ") - if err != nil { - output.Status = "failed" - output.Errors = append(output.Errors, fmt.Sprintf("failed to marshal parse result: %v", err)) - output.EndTime = time.Now() - output.TotalDuration = output.EndTime.Sub(output.StartTime) - return output, err - } - - if err := os.WriteFile(outputPath, parseResultJSON, 0644); err != nil { - output.Status = "failed" - output.Errors = append(output.Errors, fmt.Sprintf("failed to write parse result file: %v", err)) - output.EndTime = time.Now() - output.TotalDuration = output.EndTime.Sub(output.StartTime) - return output, err - } - } + if _, err := os.Stat(outputPath); err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("parse result file was not created at expected location: %s: %v", outputPath, err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err + } + + output.OutputFile = outputPath - // Verify the file actually exists - if _, err := os.Stat(outputPath); err != nil { + if input.ParseOnly { + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + output.Status = "success" + logger.Info("workflow: parse-only mode complete", "output_file", output.OutputFile) + return output, nil + } + + repo, err := github.ParseGitHubRepo(input.GitHubRepo) + if err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("invalid github repo: %v", err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err + } + + token := input.GitHubToken + if token == "" { + token, err = github.GetGitHubToken() + if err != nil { output.Status = "failed" - output.Errors = append(output.Errors, fmt.Sprintf("parse result file was not created at expected location: %s: %v", outputPath, err)) + output.Errors = append(output.Errors, fmt.Sprintf("failed to get github token: %v", err)) output.EndTime = time.Now() output.TotalDuration = output.EndTime.Sub(output.StartTime) return output, err } + } + if err := github.SetupGitHubAuth(token); err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("failed to setup github auth: %v", err)) output.EndTime = time.Now() output.TotalDuration = output.EndTime.Sub(output.StartTime) - output.Status = "success" - output.OutputFile = outputPath - - // Safely compute totalFiles with nil checks - totalFiles := 0 - if bauerResult != nil && bauerResult.ParseResult != nil { - totalFiles = len(bauerResult.ParseResult.FileMappings) - } - - logger.Info("workflow: Parse-only mode complete", - "output_file", output.OutputFile, - "extraction_duration", output.BauerResult.ExtractionDuration, - "total_suggestions", output.BauerResult.TotalSuggestions, - "total_files", totalFiles, - ) - - return output, nil + return output, err } - // PHASES 2-4: Full workflow with GitHub integration - logger.Info("workflow: Setting up GitHub") + parseFileContent, err := os.ReadFile(outputPath) + if err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("failed to read parse result for issue: %v", err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err + } - githubSetupInput := github.GitHubSetupInput{ + setupInput := github.GitHubSetupInput{ GitHubRepo: input.GitHubRepo, - GitHubToken: input.GitHubToken, + GitHubToken: token, BranchPrefix: input.BranchPrefix, LocalRepoPath: input.LocalRepoPath, } - - githubSetupOutput, err := github.SetupGitHubPhase(githubSetupInput) + setupOutput, err := github.SetupGitHubPhase(setupInput) if err != nil { output.Status = "failed" - output.Errors = append(output.Errors, err.Error()) + output.Errors = append(output.Errors, fmt.Sprintf("failed to prepare branch-backed prompt file: %v", err)) output.EndTime = time.Now() output.TotalDuration = output.EndTime.Sub(output.StartTime) return output, err } - // Store GH setup results - output.RepositoryInfo.Owner = githubSetupOutput.Repo.Owner - output.RepositoryInfo.Repo = githubSetupOutput.Repo.Name - output.RepositoryInfo.LocalPath = githubSetupOutput.LocalPath - output.RepositoryInfo.BranchName = githubSetupOutput.BranchName - output.RepositoryInfo.DefaultBranch = githubSetupOutput.DefaultBranch - output.RepositoryInfo.CurrentBranch = githubSetupOutput.CurrentBranch - - logger.Info("workflow success: GitHub setup successful") - - // Change to target repository directory - // Save original directory to restore later - originalDir, err := os.Getwd() - if err != nil { + + output.RepositoryInfo.Owner = setupOutput.Repo.Owner + output.RepositoryInfo.Repo = setupOutput.Repo.Name + output.RepositoryInfo.LocalPath = setupOutput.LocalPath + output.RepositoryInfo.BranchName = setupOutput.BranchName + output.RepositoryInfo.DefaultBranch = setupOutput.DefaultBranch + output.RepositoryInfo.CurrentBranch = setupOutput.CurrentBranch + + repoPromptPath := filepath.ToSlash(filepath.Join("bauer-output", filepath.Base(outputPath))) + targetPromptPath := filepath.Join(setupOutput.LocalPath, filepath.FromSlash(repoPromptPath)) + if err := os.MkdirAll(filepath.Dir(targetPromptPath), 0755); err != nil { output.Status = "failed" - output.Errors = append(output.Errors, fmt.Sprintf("failed to get current directory: %v", err)) + output.Errors = append(output.Errors, fmt.Sprintf("failed to create prompt file directory in repo: %v", err)) output.EndTime = time.Now() output.TotalDuration = output.EndTime.Sub(output.StartTime) return output, err } - - if err := os.Chdir(input.LocalRepoPath); err != nil { + if err := os.WriteFile(targetPromptPath, parseFileContent, 0644); err != nil { output.Status = "failed" - output.Errors = append(output.Errors, fmt.Sprintf("failed to change to cloned repository: %v", err)) + output.Errors = append(output.Errors, fmt.Sprintf("failed to write prompt file in repo branch: %v", err)) output.EndTime = time.Now() output.TotalDuration = output.EndTime.Sub(output.StartTime) return output, err } - logger.Info("workflow: changed to cloned repository", "path", input.LocalRepoPath) - defer os.Chdir(originalDir) - - // Bauer processing - logger.Info("workflow: starting phase 2 - Bauer processing") - // Create Bauer config with target repo (now current directory) - bauerCfg := &config.Config{ - DocID: input.DocID, - CredentialsPath: credentialsPath, // Use absolute path - DryRun: input.DryRun, - ChunkSize: input.ChunkSize, - PageRefresh: input.PageRefresh, - OutputDir: input.OutputDir, - Model: input.Model, - TargetRepo: ".", // Current directory is the cloned repo + commitMessage := fmt.Sprintf("Add Bauer parse output for doc %s", input.DocID) + if err := github.CommitFiles(setupOutput.LocalPath, commitMessage, []string{repoPromptPath}); err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("failed to commit branch prompt file: %v", err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err + } + if err := github.PushBranch(setupOutput.LocalPath, setupOutput.BranchName); err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("failed to push branch prompt file: %v", err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err } - logger.Info("workflow: Bauer target repository set at", "path", bauerCfg.TargetRepo) + output.FinalizationInfo.CommitMessage = commitMessage + output.FinalizationInfo.BranchPushed = true - // Execute Bauer orchestration - bauerResult, err := orch.Execute(ctx, bauerCfg) - if err != nil { - output.Status = "partial" - output.Errors = append(output.Errors, fmt.Sprintf("Bauer processing error: %v", err)) - logger.Warn("workflow: Bauer processing returned error", "error", err) - // Continue anyway - we can still commit what we have + pinnedRef := setupOutput.BranchName + if sha, shaErr := github.GetHeadCommitSHA(setupOutput.LocalPath); shaErr == nil && sha != "" { + pinnedRef = sha + } else if shaErr != nil { + output.Warnings = append(output.Warnings, fmt.Sprintf("could not resolve HEAD SHA for pinned prompt link; falling back to branch ref: %v", shaErr)) } - // Store Bauer results - if bauerResult != nil { - output.BauerResult.ExtractionDuration = bauerResult.ExtractionDuration - output.BauerResult.PlanDuration = bauerResult.PlanDuration - output.BauerResult.CopilotDuration = bauerResult.CopilotDuration - if len(bauerResult.Chunks) > 0 { - output.BauerResult.ChunkCount = len(bauerResult.Chunks) - } - if bauerResult.ExtractionResult != nil { - // Count total suggestions from extraction result - output.BauerResult.TotalSuggestions = 0 // TODO: adjust based on actual field - } + branchBlobURL := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s", setupOutput.Repo.Owner, setupOutput.Repo.Name, setupOutput.BranchName, repoPromptPath) + pinnedBlobURL := fmt.Sprintf("https://github.com/%s/%s/blob/%s/%s", setupOutput.Repo.Owner, setupOutput.Repo.Name, pinnedRef, repoPromptPath) + rawURL := fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", setupOutput.Repo.Owner, setupOutput.Repo.Name, pinnedRef, repoPromptPath) + + issueTitle := fmt.Sprintf("@copilot Apply Bauer parse result to %s", repo.Name) + issueBody := fmt.Sprintf("@copilot\n\nAutomated parse result from Bauer\n\nGDoc ID: %s", input.DocID) + if bauerResult != nil && bauerResult.ExtractionResult != nil { + issueBody = prompt.BuildIssueDescription( + bauerResult.ExtractionResult, + bauerResult.Chunks, + input.PageRefresh, + ) } + issueBody = issueBody + "\n\n## Prompt Source\n\n" + + "Use this branch-backed prompt file as the machine-readable input, together with this issue description.\n\n" + + fmt.Sprintf("- Branch file: %s\n", branchBlobURL) + + fmt.Sprintf("- Pinned file (commit SHA): %s\n", pinnedBlobURL) + + fmt.Sprintf("- Raw JSON: %s\n", rawURL) - logger.Info("Bauer results", - "extraction_duration", output.BauerResult.ExtractionDuration, - "plan_duration", output.BauerResult.PlanDuration, - "copilot_duration", output.BauerResult.CopilotDuration, - "chunk_count", output.BauerResult.ChunkCount, - "total_suggestions", output.BauerResult.TotalSuggestions, - ) - logger.Info("workflow success: Bauer processing finished") - - // GitHub finalization - logger.Info("workflow: GitHub finalization") - - commitMessage := fmt.Sprintf("Apply BAU suggestions from doc %s", input.DocID) - prTitle := fmt.Sprintf("Apply BAU suggestions to %s", githubSetupOutput.Repo.Name) - prBody := fmt.Sprintf("Automated copy update changes from Bauer\n\nGDoc ID: %s", input.DocID) - - finalizationInput := github.GitHubFinalizationInput{ - LocalRepoPath: input.LocalRepoPath, - BranchName: githubSetupOutput.BranchName, - DefaultBranch: githubSetupOutput.DefaultBranch, - Owner: githubSetupOutput.Repo.Owner, - Repo: githubSetupOutput.Repo.Name, - CommitMessage: commitMessage, - DryRun: input.DryRun, - PRTitle: prTitle, - PRBody: prBody, - Labels: []string{}, + issueURL, issueWarning, err := createIssueWithFallback(repo.Owner, repo.Name, issueTitle, issueBody) + if err != nil { + output.Status = "failed" + output.Errors = append(output.Errors, fmt.Sprintf("failed to create issue: %v", err)) + output.EndTime = time.Now() + output.TotalDuration = output.EndTime.Sub(output.StartTime) + return output, err + } + if issueWarning != "" { + output.Warnings = append(output.Warnings, issueWarning) } - finalizationOutput, _ := github.FinalizeGitHubPhase(finalizationInput) - - // Store GH PR results - output.FinalizationInfo.CommitMessage = finalizationOutput.CommitMessage - output.FinalizationInfo.BranchPushed = finalizationOutput.BranchPushed - output.FinalizationInfo.PullRequest.URL = finalizationOutput.PullRequest.URL - output.FinalizationInfo.PullRequest.Title = finalizationOutput.PullRequest.Title - - // Merge warnings and errors from finalization - output.Warnings = append(output.Warnings, finalizationOutput.Warnings...) - output.Errors = append(output.Errors, finalizationOutput.Errors...) - - logger.Info("workflow: phase 3 complete - GitHub finalization finished") + output.FinalizationInfo.Issue.URL = issueURL + output.FinalizationInfo.Issue.Title = issueTitle output.EndTime = time.Now() output.TotalDuration = output.EndTime.Sub(output.StartTime) - if len(output.Errors) == 0 { output.Status = "success" - } else if output.FinalizationInfo.BranchPushed { - output.Status = "partial" } else { - output.Status = "failed" + output.Status = "partial" } - logger.Info("workflow: complete", - "status", output.Status, - "duration", output.TotalDuration, - "errors", len(output.Errors), - "warnings", len(output.Warnings), + logger.Info("workflow: parse + issue mode complete", + "output_file", output.OutputFile, + "issue_url", output.FinalizationInfo.Issue.URL, + "branch", output.RepositoryInfo.BranchName, ) return output, nil } + +func createIssueWithFallback(owner, repo, title, body string) (issueURL, warning string, err error) { + issueURL, err = github.CreateIssue(owner, repo, github.CreateIssueOptions{ + Title: title, + Body: body, + Assignees: []string{"copilot"}, + Labels: []string{"copilot", "bauer"}, + }) + if err == nil { + return issueURL, "", nil + } + + // Fallback 1: some repos/orgs cannot resolve/assign the "copilot" login. + issueURL, err = github.CreateIssue(owner, repo, github.CreateIssueOptions{ + Title: title, + Body: body, + Labels: []string{"copilot", "bauer"}, + }) + if err == nil { + return issueURL, "could not assign issue to 'copilot'; created issue without assignee and kept @copilot mention in body", nil + } + + // Fallback 2: some repos don't have one or both labels. + issueURL, err = github.CreateIssue(owner, repo, github.CreateIssueOptions{ + Title: title, + Body: body, + }) + if err == nil { + return issueURL, "could not assign issue to 'copilot' and one or more labels were unavailable; created issue without assignee/labels and kept @copilot mention in body", nil + } + + return "", "", err +} From 00b7524fb2a603467bcbef201c654be183fc59da Mon Sep 17 00:00:00 2001 From: Britney Wang Date: Tue, 9 Jun 2026 16:18:02 +0800 Subject: [PATCH 02/15] feat: remove dryRun flag --- cmd/bauer/main.go | 9 ++++----- internal/workflow/api.go | 27 +++++++++++++++++---------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/cmd/bauer/main.go b/cmd/bauer/main.go index 40d5eb9..38acf59 100644 --- a/cmd/bauer/main.go +++ b/cmd/bauer/main.go @@ -17,8 +17,7 @@ func main() { docID := flag.String("doc-id", "", "Google Doc ID") credentialsPath := flag.String("credentials", "bau-test-creds.json", "Path to service account credentials JSON") localRepoPath := flag.String("local-repo-path", "/tmp/ubuntu.com", "Local path for cloned repository") - dryRun := flag.Bool("dry-run", false, "Perform a dry run without creating PR") - parseOnly := flag.Bool("parse-only", false, "Phase 1: Parse document and output machine-readable JSON (skip GitHub integration)") + parseOnly := flag.Bool("parse-only", false, "Parse document and output machine-readable JSON only") outputDir := flag.String("output-dir", "bauer-output", "Output directory for Bauer results") branchPrefix := flag.String("branch-prefix", "bauer", "Branch naming prefix") @@ -43,7 +42,7 @@ func main() { // Create workflow input from CLI flags/config ghToken := "" - if !*parseOnly { + if *githubRepo != "" { var err error ghToken, err = github.GetGitHubToken() if err != nil { @@ -58,7 +57,6 @@ func main() { DocID: *docID, Credentials: *credentialsPath, LocalRepoPath: *localRepoPath, - DryRun: *dryRun, ParseOnly: *parseOnly, OutputDir: *outputDir, } @@ -78,7 +76,8 @@ func main() { fmt.Printf("Output file: %s\n", result.OutputFile) } else { fmt.Printf("Status: %s\n", result.Status) + fmt.Printf("Output file: %s\n", result.OutputFile) fmt.Printf("Branch: %s\n", result.RepositoryInfo.BranchName) - fmt.Printf("PR: %s\n", result.FinalizationInfo.PullRequest.URL) + fmt.Printf("Issue: %s\n", result.FinalizationInfo.Issue.URL) } } diff --git a/internal/workflow/api.go b/internal/workflow/api.go index 891eb6b..800fb8b 100644 --- a/internal/workflow/api.go +++ b/internal/workflow/api.go @@ -24,7 +24,7 @@ type APIRequest struct { PageRefresh bool `json:"page_refresh" default:"false"` // Page refresh mode OutputDir string `json:"output_dir" default:"bauer-output"` // Output directory Model string `json:"model" default:"gpt-5-mini-high"` // Copilot model - DryRun bool `json:"dry_run" default:"false"` // Dry run mode + ParseOnly bool `json:"parse_only" default:"false"` // Parse-only mode // Local repository path LocalRepoPath string `json:"local_repo_path" default:"/tmp"` // Where to clone (optional) @@ -58,11 +58,11 @@ func ExecuteWorkflowHandler(orch orchestrator.Orchestrator) http.HandlerFunc { } // Validate request - if req.GitHubRepo == "" { + if !req.ParseOnly && req.GitHubRepo == "" { writeError(w, http.StatusBadRequest, "github_repo is required") return } - if req.GitHubToken == "" { + if req.GitHubRepo != "" && req.GitHubToken == "" { writeError(w, http.StatusBadRequest, "github_token is required") return } @@ -74,7 +74,6 @@ func ExecuteWorkflowHandler(orch orchestrator.Orchestrator) http.HandlerFunc { writeError(w, http.StatusBadRequest, "credentials is required") return } - // Set defaults if req.BranchPrefix == "" { req.BranchPrefix = "bauer" @@ -103,14 +102,15 @@ func ExecuteWorkflowHandler(orch orchestrator.Orchestrator) http.HandlerFunc { PageRefresh: req.PageRefresh, OutputDir: req.OutputDir, Model: req.Model, - DryRun: req.DryRun, + ParseOnly: req.ParseOnly, LocalRepoPath: fmt.Sprintf("%s/%s-%d", req.LocalRepoPath, "bauer-workflow", time.Now().Unix()), } logger.Info("workflow API request", "github_repo", req.GitHubRepo, "doc_id", req.DocID, - "dry_run", req.DryRun, + "parse_only", req.ParseOnly, + "mode", map[bool]string{true: "parse-only", false: "parse-and-issue"}[req.ParseOnly], ) // Execute workflow @@ -128,10 +128,17 @@ func ExecuteWorkflowHandler(orch orchestrator.Orchestrator) http.HandlerFunc { switch workflowOutput.Status { case "success": - response.Message = fmt.Sprintf( - "Workflow completed successfully. PR: %s", - workflowOutput.FinalizationInfo.PullRequest.URL, - ) + if workflowOutput.FinalizationInfo.Issue.URL != "" { + response.Message = fmt.Sprintf( + "Workflow completed successfully. Issue: %s", + workflowOutput.FinalizationInfo.Issue.URL, + ) + } else { + response.Message = fmt.Sprintf( + "Workflow completed successfully. Output file: %s", + workflowOutput.OutputFile, + ) + } case "partial": response.Message = fmt.Sprintf( "Workflow completed with errors. Branch: %s. Errors: %d", From 072138eaeb50182b4ebcacb207d39aafe89762b2 Mon Sep 17 00:00:00 2001 From: Britney Wang Date: Tue, 9 Jun 2026 16:20:01 +0800 Subject: [PATCH 03/15] feat: add GH issue creation --- internal/github/issue.go | 97 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 internal/github/issue.go diff --git a/internal/github/issue.go b/internal/github/issue.go new file mode 100644 index 0000000..a6d5d2f --- /dev/null +++ b/internal/github/issue.go @@ -0,0 +1,97 @@ +package github + +import ( + "fmt" + "os/exec" + "strings" +) + +// CreateIssueOptions holds options for creating an issue. +type CreateIssueOptions struct { + Title string + Body string + Labels []string + Assignees []string +} + +// CreateIssue creates a GitHub issue using gh CLI. +func CreateIssue(owner, repo string, opts CreateIssueOptions) (string, error) { + if opts.Title == "" { + return "", fmt.Errorf("issue title is required") + } + + args := []string{ + "issue", "create", + "--repo", fmt.Sprintf("%s/%s", owner, repo), + "--title", opts.Title, + } + + if opts.Body != "" { + args = append(args, "--body", opts.Body) + } + + for _, label := range opts.Labels { + args = append(args, "--label", label) + } + + for _, assignee := range opts.Assignees { + args = append(args, "--assignee", assignee) + } + + cmd := exec.Command("gh", args...) + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to create issue: %w, output: %s", err, output) + } + + outputStr := string(output) + lines := strings.Split(outputStr, "\n") + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, "https://github.com/") { + return trimmed, nil + } + } + + return "", fmt.Errorf("could not extract issue URL from output: %s", outputStr) +} + +// AddIssueComment posts a comment to an existing issue. +func AddIssueComment(owner, repo, issueNumber, body string) error { + if owner == "" || repo == "" || issueNumber == "" { + return fmt.Errorf("owner, repo, and issueNumber are required") + } + if strings.TrimSpace(body) == "" { + return fmt.Errorf("comment body cannot be empty") + } + + cmd := exec.Command( + "gh", "issue", "comment", issueNumber, + "--repo", fmt.Sprintf("%s/%s", owner, repo), + "--body", body, + ) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to add issue comment: %w, output: %s", err, output) + } + + return nil +} + +// ExtractIssueNumberFromURL parses issue URL like +// https://github.com/owner/repo/issues/123 and returns "123". +func ExtractIssueNumberFromURL(issueURL string) (string, error) { + issueURL = strings.TrimSpace(issueURL) + if issueURL == "" { + return "", fmt.Errorf("issue URL is empty") + } + parts := strings.Split(issueURL, "/") + if len(parts) == 0 { + return "", fmt.Errorf("invalid issue URL: %s", issueURL) + } + n := parts[len(parts)-1] + if n == "" { + return "", fmt.Errorf("invalid issue URL: %s", issueURL) + } + return n, nil +} From 9ea4ef56bb426421e5e61f9f9fa24e4215f3d4bc Mon Sep 17 00:00:00 2001 From: Britney Wang Date: Tue, 9 Jun 2026 16:20:29 +0800 Subject: [PATCH 04/15] feat: commit parser output to remote branch --- internal/github/remote_pr.go | 141 +++++++++++++++++++++++++++++++++++ internal/github/repo.go | 44 +++++++++++ 2 files changed, 185 insertions(+) create mode 100644 internal/github/remote_pr.go diff --git a/internal/github/remote_pr.go b/internal/github/remote_pr.go new file mode 100644 index 0000000..4e51b43 --- /dev/null +++ b/internal/github/remote_pr.go @@ -0,0 +1,141 @@ +package github + +import ( + "encoding/base64" + "fmt" + "os/exec" + "strings" + "time" +) + +// CreateParseResultPROptions configures remote PR creation from parse output. +type CreateParseResultPROptions struct { + Owner string + Repo string + BranchPrefix string + BaseBranch string + PRTitle string + PRBody string + FilePath string + FileContent []byte + CommitMsg string +} + +// CreateParseResultPR creates a branch, commits parse-result content, and opens a PR +// directly against the target repository using gh API (no local clone required). +func CreateParseResultPR(opts CreateParseResultPROptions) (prURL string, branchName string, err error) { + if opts.Owner == "" || opts.Repo == "" { + return "", "", fmt.Errorf("owner and repo are required") + } + if len(opts.FileContent) == 0 { + return "", "", fmt.Errorf("file content cannot be empty") + } + if opts.FilePath == "" { + opts.FilePath = "bauer-output/bauer-parse-result.json" + } + if opts.CommitMsg == "" { + opts.CommitMsg = "Add bauer parse result" + } + + if opts.BaseBranch == "" { + defaultBranch, getErr := GetDefaultBranchRemote(opts.Owner, opts.Repo) + if getErr != nil { + return "", "", fmt.Errorf("failed to resolve base branch: %w", getErr) + } + opts.BaseBranch = defaultBranch + } + + if opts.BranchPrefix == "" { + opts.BranchPrefix = "bauer" + } + + branchName = fmt.Sprintf("%s/parse-result-%d", opts.BranchPrefix, time.Now().Unix()) + + baseSHA, err := getBranchHeadSHA(opts.Owner, opts.Repo, opts.BaseBranch) + if err != nil { + return "", "", fmt.Errorf("failed to read base branch sha: %w", err) + } + + if err := createBranchRef(opts.Owner, opts.Repo, branchName, baseSHA); err != nil { + return "", "", fmt.Errorf("failed to create remote branch: %w", err) + } + + encodedContent := base64.StdEncoding.EncodeToString(opts.FileContent) + if err := putFileOnBranch(opts.Owner, opts.Repo, branchName, opts.FilePath, opts.CommitMsg, encodedContent); err != nil { + return "", "", fmt.Errorf("failed to commit parse result file: %w", err) + } + + prOpts := CreatePROptions{ + Title: opts.PRTitle, + Body: opts.PRBody, + HeadBranch: branchName, + BaseBranch: opts.BaseBranch, + } + + prURL, err = CreatePR(opts.Owner, opts.Repo, prOpts) + if err != nil { + return "", branchName, fmt.Errorf("failed to create PR: %w", err) + } + + return prURL, branchName, nil +} + +// GetDefaultBranchRemote resolves the default branch for a repository via gh api. +func GetDefaultBranchRemote(owner, repo string) (string, error) { + cmd := exec.Command("gh", "api", fmt.Sprintf("repos/%s/%s", owner, repo), "--jq", ".default_branch") + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("gh api failed: %w, output: %s", err, output) + } + branch := strings.TrimSpace(string(output)) + if branch == "" { + return "", fmt.Errorf("empty default branch response") + } + return branch, nil +} + +func getBranchHeadSHA(owner, repo, branch string) (string, error) { + cmd := exec.Command( + "gh", "api", + fmt.Sprintf("repos/%s/%s/git/ref/heads/%s", owner, repo, branch), + "--jq", ".object.sha", + ) + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("gh api failed: %w, output: %s", err, output) + } + sha := strings.TrimSpace(string(output)) + if sha == "" { + return "", fmt.Errorf("empty sha response for branch %s", branch) + } + return sha, nil +} + +func createBranchRef(owner, repo, branchName, sha string) error { + cmd := exec.Command( + "gh", "api", "-X", "POST", + fmt.Sprintf("repos/%s/%s/git/refs", owner, repo), + "-f", "ref=refs/heads/"+branchName, + "-f", "sha="+sha, + ) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("gh api failed: %w, output: %s", err, output) + } + return nil +} + +func putFileOnBranch(owner, repo, branch, path, message, encodedContent string) error { + cmd := exec.Command( + "gh", "api", "-X", "PUT", + fmt.Sprintf("repos/%s/%s/contents/%s", owner, repo, path), + "-f", "message="+message, + "-f", "content="+encodedContent, + "-f", "branch="+branch, + ) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("gh api failed: %w, output: %s", err, output) + } + return nil +} diff --git a/internal/github/repo.go b/internal/github/repo.go index 660cb75..002d0f7 100644 --- a/internal/github/repo.go +++ b/internal/github/repo.go @@ -199,6 +199,50 @@ func CommitChanges(localPath, message string) error { return nil } +// CommitFiles stages only the provided files and commits them with a message. +func CommitFiles(localPath, message string, files []string) error { + if len(files) == 0 { + return fmt.Errorf("no files provided to commit") + } + + args := []string{"add", "--"} + args = append(args, files...) + cmd := exec.Command("git", args...) + cmd.Dir = localPath + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to stage files: %w, output: %s", err, output) + } + + cmd = exec.Command("git", "diff", "--cached", "--name-only") + cmd.Dir = localPath + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to inspect staged files: %w, output: %s", err, output) + } + if strings.TrimSpace(string(output)) == "" { + return fmt.Errorf("no staged changes to commit") + } + + cmd = exec.Command("git", "commit", "-m", message) + cmd.Dir = localPath + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to commit files: %w, output: %s", err, output) + } + + return nil +} + +// GetHeadCommitSHA returns the SHA of HEAD in the given repository. +func GetHeadCommitSHA(localPath string) (string, error) { + cmd := exec.Command("git", "rev-parse", "HEAD") + cmd.Dir = localPath + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to read HEAD commit SHA: %w, output: %s", err, output) + } + return strings.TrimSpace(string(output)), nil +} + // PushBranch pushes the specified branch to remote func PushBranch(localPath, branchName string) error { cmd := exec.Command("git", "push", "origin", branchName) From 865ec63fa7015d49b15168831b57d2a5e7022dbc Mon Sep 17 00:00:00 2001 From: Britney Wang Date: Tue, 9 Jun 2026 16:25:55 +0800 Subject: [PATCH 05/15] feat: implement issue desc builder --- internal/prompt/pr_description.go | 130 +++++++++++++++++++++++++ internal/prompt/pr_description_test.go | 56 +++++++++++ 2 files changed, 186 insertions(+) create mode 100644 internal/prompt/pr_description.go create mode 100644 internal/prompt/pr_description_test.go diff --git a/internal/prompt/pr_description.go b/internal/prompt/pr_description.go new file mode 100644 index 0000000..8df0d34 --- /dev/null +++ b/internal/prompt/pr_description.go @@ -0,0 +1,130 @@ +package prompt + +import ( + _ "embed" + "fmt" + "path/filepath" + "strings" + + "bauer/internal/gdocs" +) + +//go:embed templates/pr-description.md +var prDescriptionTemplate string + +// BuildPRDescription renders a PR description that references the prompt templates +// and summarizes extracted suggestions for Copilot execution. +func BuildPRDescription(result *gdocs.ProcessingResult, chunks []ChunkResult, usePageRefresh bool) string { + if result == nil { + return "# @copilot Apply BAU Suggestions\n\nNo extraction result was available." + } + + instructionsTemplatePath := "internal/prompt/templates/copy-docs-instructions.md" + mode := "copy-docs" + if usePageRefresh { + instructionsTemplatePath = "internal/prompt/templates/page-refresh-instructions.md" + mode = "page-refresh" + } + + suggestedURL := "" + if result.Metadata != nil { + suggestedURL = result.Metadata.SuggestedUrl + } + + insertCount, deleteCount, replaceCount := summarizeSuggestionTypes(result.ActionableSuggestions) + + chunkList := "- No chunk files were generated." + if len(chunks) > 0 { + var lines []string + for _, chunk := range chunks { + chunkFile := filepath.ToSlash(chunk.Filename) + lines = append(lines, fmt.Sprintf("- `%s`", chunkFile)) + } + chunkList = strings.Join(lines, "\n") + } + + body := prDescriptionTemplate + body = replaceVar(body, "DocumentTitle", result.DocumentTitle) + body = replaceVar(body, "DocumentID", result.DocumentID) + body = replaceVar(body, "SuggestedURL", suggestedURL) + body = replaceVar(body, "Mode", mode) + body = replaceVar(body, "InstructionsTemplatePath", instructionsTemplatePath) + body = replaceVar(body, "PatternsTemplatePath", "internal/prompt/templates/vanilla-patterns.md") + body = replaceVar(body, "ChunkFiles", chunkList) + body = replaceVar(body, "LocationCount", fmt.Sprintf("%d", len(result.GroupedSuggestions))) + body = replaceVar(body, "SuggestionCount", fmt.Sprintf("%d", len(result.ActionableSuggestions))) + body = replaceVar(body, "InsertCount", fmt.Sprintf("%d", insertCount)) + body = replaceVar(body, "DeleteCount", fmt.Sprintf("%d", deleteCount)) + body = replaceVar(body, "ReplaceCount", fmt.Sprintf("%d", replaceCount)) + + return body +} + +// BuildIssueDescription renders an issue description for Copilot. +// Full parse JSON is posted in follow-up issue comments to avoid body size limits. +func BuildIssueDescription(result *gdocs.ProcessingResult, chunks []ChunkResult, usePageRefresh bool) string { + body := BuildPRDescription(result, chunks, usePageRefresh) + + var b strings.Builder + b.WriteString(body) + b.WriteString("\n\n## Parsed Output (Machine Readable)\n\n") + b.WriteString("Full parse JSON is posted in the issue comments below as chunked `json` blocks.\n") + b.WriteString("Process chunks in order (`Part 1/N`, `Part 2/N`, ...).\n") + + return b.String() +} + +// BuildIssueJSONComments splits parse JSON into comment-safe chunks. +func BuildIssueJSONComments(parseResultJSON string) []string { + const maxJSONPerComment = 45000 + if parseResultJSON == "" { + return []string{} + } + + parts := splitByLimit(parseResultJSON, maxJSONPerComment) + comments := make([]string, 0, len(parts)) + for i, part := range parts { + comments = append(comments, + fmt.Sprintf("### Parsed Output Part %d/%d\n\n```json\n%s\n```", i+1, len(parts), part), + ) + } + + return comments +} + +func splitByLimit(s string, max int) []string { + if len(s) <= max { + return []string{s} + } + + parts := []string{} + for len(s) > 0 { + if len(s) <= max { + parts = append(parts, s) + break + } + + cut := max + if idx := strings.LastIndex(s[:max], "\n"); idx > 0 { + cut = idx + } + parts = append(parts, s[:cut]) + s = s[cut:] + } + + return parts +} + +func summarizeSuggestionTypes(suggestions []gdocs.ActionableSuggestion) (insertCount, deleteCount, replaceCount int) { + for _, s := range suggestions { + switch s.Change.Type { + case "insert": + insertCount++ + case "delete": + deleteCount++ + case "replace": + replaceCount++ + } + } + return insertCount, deleteCount, replaceCount +} diff --git a/internal/prompt/pr_description_test.go b/internal/prompt/pr_description_test.go new file mode 100644 index 0000000..64ee9b0 --- /dev/null +++ b/internal/prompt/pr_description_test.go @@ -0,0 +1,56 @@ +package prompt + +import ( + "testing" + + "bauer/internal/gdocs" +) + +func TestBuildPRDescription_IncludesTemplateReferences(t *testing.T) { + result := &gdocs.ProcessingResult{ + DocumentTitle: "Copy of example.com/page", + DocumentID: "doc-123", + Metadata: &gdocs.MetadataTable{ + SuggestedUrl: "example.com/page", + }, + GroupedSuggestions: []gdocs.LocationGroupedSuggestions{ + {}, + {}, + }, + ActionableSuggestions: []gdocs.ActionableSuggestion{ + {Change: gdocs.SuggestionChange{Type: "insert"}}, + {Change: gdocs.SuggestionChange{Type: "delete"}}, + {Change: gdocs.SuggestionChange{Type: "replace"}}, + }, + } + + chunks := []ChunkResult{{Filename: "bauer-output/chunk-1-of-1.md"}} + body := BuildPRDescription(result, chunks, false) + + expected := []string{ + "@copilot", + "internal/prompt/templates/copy-docs-instructions.md", + "internal/prompt/templates/vanilla-patterns.md", + "`bauer-output/chunk-1-of-1.md`", + "Grouped locations: 2", + "Atomic actionable suggestions: 3", + } + + for _, e := range expected { + if !contains(body, e) { + t.Fatalf("expected PR description to contain %q", e) + } + } +} + +func TestBuildPRDescription_PageRefreshTemplateReference(t *testing.T) { + result := &gdocs.ProcessingResult{ + DocumentTitle: "Doc", + DocumentID: "doc-abc", + } + + body := BuildPRDescription(result, nil, true) + if !contains(body, "internal/prompt/templates/page-refresh-instructions.md") { + t.Fatalf("expected page refresh template reference in PR description") + } +} From ec8326682f13c253d97c8565889e83670d362bce Mon Sep 17 00:00:00 2001 From: Britney Wang Date: Tue, 9 Jun 2026 16:26:10 +0800 Subject: [PATCH 06/15] docs: update docs with new flags --- README.md | 36 ++++++++++++++++++++++++++++-------- internal/prompt/README.md | 3 ++- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d0cf165..0c88046 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ cp credentials.example.json credentials.json ## Usage 1. Build Bauer locally using the Local development steps above (`task build`) -2. If running with GitHub integration (no `--parse-only`), ensure `copilot` is installed and authenticated +2. If running with GitHub issue creation (no `--parse-only`), ensure GitHub CLI auth is available 3. Get document ID from Google Document & share the document with the service account 4. Run Bauer @@ -49,17 +49,29 @@ cp credentials.example.json credentials.json 5. Optional parameters -| Flag | Type | Default | Description | Requires Copilot | -| ------------------ | ------ | -------------------- | ------------------------------------------------------------------------------- | ---------------- | +| Flag | Type | Default | Description | Requires GitHub Auth | +| ------------------ | ------ | -------------------- | ------------------------------------------------------------------------------- | -------------------- | | `--github-repo` | string | (required if not parse-only) | GitHub repository (owner/repo or HTTPS URL) | Yes* | | `--credentials` | string | `bau-test-creds.json` | Path to service account credentials JSON | No | | `--local-repo-path` | string | `/tmp/ubuntu.com` | Local path for cloned repository | No | -| `--dry-run` | bool | `false` | Perform a dry run without creating PR | Yes* | | `--output-dir` | string | `bauer-output` | Output directory for Bauer results | No | | `--branch-prefix` | string | `bauer` | Branch naming prefix | No | -| `--parse-only` | bool | `false` | Parse document and output machine-readable JSON (skip GitHub integration) | No | +| `--parse-only` | bool | `false` | Parse document and output machine-readable JSON only | No | + +Current execution modes: + +1. Parse-only mode (`--parse-only`) +- Creates `bauer-output/bauer-parse-result.json` +- Does not push branches +- Does not create issues + +2. Parse-and-issue mode (without `--parse-only`) +- Creates `bauer-output/bauer-parse-result.json` +- Creates a branch and pushes the parse file into that branch +- Opens a GitHub issue assigned to Copilot (with fallbacks) and includes branch/pinned/raw links to the prompt file + +*GitHub auth is only required when using parse-and-issue mode. -*These flags require Copilot integration to be configured when performing GitHub operations (not needed for `--parse-only`)